Load Packages

library(lubridate)
library(tidyverse)
library(tidytext)
library(govtrackR)
library(scales)
library(ggtext)
library(gganimate)
library(hrbrthemes)
library(rtemis)
library(viridis)
library(ggrepel)
library(highcharter)
library(tidylo)
library(widyr)
library(gt)
library(tidygraph)
library(ggraph)
library(igraph)
library(d3r)
library(treemap)
library(sunburstR)
library(reactable)
library(skimr)
library(trelliscopejs)
library(glue)
options(highcharter.theme = hc_theme_hcrt(tooltip = list(valueDecimals = 2)))

Part I: Exploring Product Service Codes

Lets explore PSC’s to help us better understand the products and services the government procures.

tbl_psc <- dictionary_psc_active(only_active = T, snake_names = T)

Building Table to Explore PSCs

Lets build an an interactive table that lets us see all the PSCs

psc_tbl <- tbl_psc %>%
  select(
    is_active_psc,
    type_psc,
    name_solicitation_group,
    code_product_service,
    name_product_service,
    date_start,
    date_end,
    details_product_service_includes
  ) %>%
  reactable(
    filterable = T,
    resizable = T,
    searchable = T,
    showPageSizeOptions = T,
    defaultPageSize = 4,
    pageSizeOptions = c(5, 10, 20),
    sortable = T,
    compact = T
  )

Here it is

psc_tbl

Explore the Breakdown Between Products and Services

What is the breakdown between these two groups?

gg_psc_bkd <- 
  tbl_psc %>%
  count(type_psc, sort = T, name = "count") %>%
  mutate(type_psc = fct_reorder(type_psc, count)) %>%
  ggplot(aes(x = type_psc, y = count, fill = type_psc)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_viridis(discrete = TRUE, name = "") +
  theme_ipsum() +
  ylab("Number of Product Service Codes") +
  xlab("") +
  ggtitle("Products Versus Services") +
  coord_flip()
gg_psc_bkd

New PSCs overtime

How often are new PSC’s added?

First, how many new PSC’s are there on a given date?

tbl_new_codes <-
  tbl_psc %>%
  group_by(type_psc, date_start) %>%
  summarise(count_added = n(), .groups = "drop")

Lets explore this interactively

hc_new_psc <-
  hchart(tbl_new_codes,
         "line",
         hcaes(x = date_start, y = count_added, group = type_psc)) %>%
  hc_title(text = "New Product Service Codes by Date Added") %>%
  hc_yAxis(title = list(text = "Count of New Codes")) %>%
  hc_xAxis(title = list(text = "Date Added"))
hc_new_psc

Lets make this look a bit better

hc_new_psc <-
  hc_new_psc %>%
  hc_add_theme(hc_theme_hcrt())
hc_new_psc

Build A Network Graph of the 10 Newest PSCs

Lets take a look at the 10 newest PSCs

tbl_10_new_psc <-
  tbl_psc %>%
  filter(!is_parent_psc) %>%
  arrange(desc(date_start)) %>%
  group_by(type_psc) %>%
  slice(1:10) %>%
  ungroup()
gt(tbl_10_new_psc)
is_parent_psc is_active_psc type_psc id_solicitation_group name_solicitation_group group_product_service code_product_service name_product_service name_psc details_psc date_start date_end name_product_service_full details_product_service_excludes details_product_service_notes details_product_service_includes count_items_included count_items_excluded count_days_active
FALSE TRUE PRODUCT 15 AEROSPACE CRAFT AND STRUCTURAL COMPONENTS 155 1555 SPACE VEHICLES SPACE VEHICLES NA 2015-10-01 NA NA NA THIS CLASS INCLUDES ONLY COMPLETE SPACE VEHICLES IN ASSEMBLED OR UNASSEMBLED FORM NA 0 0 1862
FALSE TRUE PRODUCT 16 AEROSPACE CRAFT COMPONENTS AND ACCESSORIES 167 1675 SPACE VEHICLE COMPONENTS SPACE VEHICLE COMPONENTS NA 2015-10-01 NA SPACE VEHICLE COMPONENTS REMOTE GUIDANCE EQUIPMENT NA STRUCTURAL COMPONENTS; COMPONENTS AND ACCESSORIES SPECIALLY DESIGNED FOR INSTALLATION IN OR ON SPACE VEHICLES; INTERNAL CONTROL SYSTEMS 2 0 1862
FALSE TRUE PRODUCT 16 AEROSPACE CRAFT COMPONENTS AND ACCESSORIES 167 1677 SPACE VEHICLE REMOTE CONTROL SYSTEMS SPACE VEHICLE REMOTE CONTROL SYSTEMS NA 2015-10-01 NA SPACE VEHICLE REMOTE CONTROL SYSTEMS INTERNAL CONTROL SYSTEMS; COMPONENTS DESIGNED FOR USE WITH BOTH GUIDED MISSILE AND SPACE VEHICLE SYSTEMS NA SPECIFICALLY DESIGNED COMPONENTS OF SPACE VEHICLE REMOTE CONTROL SYSTEMS 0 1 1862
FALSE TRUE PRODUCT 17 AEROSPACE CRAFT LAUNCHING, LANDING, GROUND HANDLING AND SERVICING EQUIPMENT 172 1725 SPACE VEHICLE LAUNCHERS SPACE VEHICLE LAUNCHERS NA 2015-10-01 NA SPACE VEHICLE LAUNCHERS LAUNCHERS USED WITH BOTH GUIDED MISSILE AND SPACE VEHICLES NA LAUNCHERS SPECIFICALLY DESIGNED FOR SPACE VEHICLES 0 0 1862
FALSE TRUE PRODUCT 17 AEROSPACE CRAFT LAUNCHING, LANDING, GROUND HANDLING AND SERVICING EQUIPMENT 173 1735 SPACE VEHICLE HANDLING AND SERVICING EQUIPMENT SPACE VEHICLE HANDLING AND SERVICING EQUIPMENT NA 2015-10-01 NA SPACE VEHICLE HANDLING AND SERVICING EQUIPMENT EQUIPMENT USED IN HANDLING OR SERVICING BOTH GUIDED MISSILES AND SPACE VEHICLES THIS CLASS DOES NOT INCLUDE SPACE VEHICLE AERIAL RECOVERY SYSTEMS WHICH ARE CLASSIFIED IN CLASS 1670 SPECIALLY DESIGNED TRUCKS AND TRAILERS FOR SUE IN TRANSPORTING SPACE VEHICLES; SPECIALLY DESIGNED SLINGS, HOISTS, JACKS, BLOWERS, SELF-PROPELLED VEHICLES, SPECIALLY DESIGNED FOR SPACE VEHICLE HANDLING OR SERVICING; COVERS, SPACE VEHICLE 2 0 1862
FALSE TRUE PRODUCT 70 INFORMATION TECHNOLOGY EQUIPMENT SOFTWARE,SUPPLIES& SUPPORT EQUIPMENT 701 7010 INFORMATION TECHNOLOGY EQUIPMENT SYSTEM CONFIGURATION INFORMATION TECHNOLOGY EQUIPMENT SYSTEM CONFIGURATION NA 2015-10-01 NA INFORMATION TECHNOLOGY EQUIPMENT SYSTEM CONFIGURATION NA A GROUP OF GENERAL PURPOSE ANALOG, DIGITAL, OR HYBRID ELECTRONIC OR ELECTROMECHANICAL DEVICES THAT ARE INTERCONNECTED TO OPERATE AS A SYSTEM FREQUENTLY REFERRED TO AS AN INFORMATION TECHNOLOGY SYSTEM OR AUTOMATED DATA PROCESSING SYSTEM ALTHOUGH THE TERM SYSTEM IS NOT CONSIDERED DEFINITIVE INCLUDES AN ASSEMBLY OF DEVICES CONSISTING OF A CENTRAL PROCESSING UNIT AND THE NECESSARY INPUT/OUTPUT DEVICES, ACCESSORIAL DEVICES, ANALOG MEASUREMENT DEVICES, SOFTWARE AND/OR FIRMWARE REQUIRED TO PERFORM THE DESIRED OBJECTIVES ALSO INCLUDES SUB-ASSEMBLIES AND UNITS IN WHICH INPUT/OUTPUT DEVICES, CPUS, AND/OR ACCESSORIAL DEVICES OR COMPONENTS ARE INCORPORATED INTO A SINGLE ASSEMBLY OR UNIT A SPECIALLY DESIGNED DEVICE INCORPORATED INTO A SYSTEM SHALL NOT INFLUENCE THE CLASSIFICATION OF THE BASE ASSEMBLY OR UNIT NA 0 0 1862
FALSE TRUE PRODUCT 70 INFORMATION TECHNOLOGY EQUIPMENT SOFTWARE,SUPPLIES& SUPPORT EQUIPMENT 702 7020 INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , ANALOG INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , ANALOG NA 2015-10-01 NA INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , ANALOG NA AN ANALOG IS A REPRESENTATION IN ONE FORM OF A PHYSICAL CONDITION EXISTING IN ANOTHER FORM THIS CLASS INCLUDES ONLY CPUS THAT ACCEPT AS INPUTS THE ELECTRICAL EQUIVALENT OF PHYSICAL CONDITIONS SUCH AS FLOW, TEMPERATURE, PRESSURE, ANGULAR POSITION OR VOLTAGE AND PERFORM COMPUTATIONS BY MANIPULATING THESE ELECTRICAL EQUIVALENTS TO PRODUCE RESULTS FOR FURTHER USE NA 0 0 1862
FALSE TRUE PRODUCT 70 INFORMATION TECHNOLOGY EQUIPMENT SOFTWARE,SUPPLIES& SUPPORT EQUIPMENT 702 7021 INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , DIGITAL INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , DIGITAL NA 2015-10-01 NA INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , DIGITAL NA DIGITAL REFERS TO THE REPRESENTATION OF DISCRETE NUMBERS, SYMBOLS AND ALPHABETIC CHARACTERS BY A PREDETERMINED, CODED COMBINATION OF ELECTRICAL IMPULSES THIS CLASS INCLUDES ONLY CPUS THAT ACCEPT INFORMATION REPRESENTED BY DIGITAL IMPULSES SPECIFICALLY, A DEVICE CAPABLE OF PERFORMING SEQUENCES OF ARITHMETIC AND LOGIC OPERATIONS NOT ONLY ON DATA BUT ALSO ON THE PROGRAM WHICH IS CONTAINED IN ITS INTERNAL MEMORY WITHOUT INTERVENTION OF AN OPERATOR NA 0 0 1862
FALSE TRUE PRODUCT 70 INFORMATION TECHNOLOGY EQUIPMENT SOFTWARE,SUPPLIES& SUPPORT EQUIPMENT 702 7022 INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , HYBRID INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , HYBRID NA 2015-10-01 NA INFORMATION TECHNOLOGY CENTRAL PROCESSING UNIT , HYBRID NA HYBRID REFERS TO A COMBINATION OF ANALOG AND DIGITAL CAPABILITY AS DEFINED IN CLASSES 7020 AND 7021 WITH CONVERSION CAPABILITY REQUIRED FOR INTERCOMMUNICATION NA 0 0 1862
FALSE TRUE PRODUCT 70 INFORMATION TECHNOLOGY EQUIPMENT SOFTWARE,SUPPLIES& SUPPORT EQUIPMENT 702 7025 INFORMATION TECHNOLOGY INPUT/OUTPUT AND STORAGE DEVICES INFORMATION TECHNOLOGY INPUT/OUTPUT AND STORAGE DEVICES NA 2015-10-01 NA INFORMATION TECHNOLOGY INPUT/OUTPUT AND STORAGE DEVICES NA THIS CLASS INCLUDES DEVICES USED TO CONTROL TRANSFER INFORMATION TO AND FROM A COMPUTER THE INPUT DEVICE IS USED FOR TRANSFERRING DATA AND INSTRUCTIONS INTO A COMPUTER THE OUTPUT DEVICE IS USED TO TRANSFER RESULTS OF PROCESSING BY THE COMPUTER TO THE INFORMATION TECHNOLOGY OR ADP PERIPHERAL DEVICES INPUT/OUTPUT DEVICES COMBINE THE ABOVE FUNCTIONS IN THE SAME DEVICE THIS CLASS INCLUDES PRINTERS, DISPLAY UNITS, DISK DRIVE UNITS , TAPE DRIVE UNITS, TERMINALS, DATA ENTRY DEVICES AND TRANSFER UNITS ALSO INCLUDES OPTICAL COMPACT DISK DEVICES USED FOR THE STORAGE AND RETRIEVAL OF DATA AND FIRMWARE NA 0 0 1862
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2A M2AA HUSBANDING SERVICES, COMMUNICATIONS SERVICES HUSBANDING SERVICES, COMMUNICATIONS SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, COMMUNICATIONS SERVICES NA NA INCLUDES LANDLINES, MOBILE PHONES, AND SIM CARDS 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2A M2AB HUSBANDING SERVICES, FORCE PROTECTION HUSBANDING SERVICES, FORCE PROTECTION NA 2020-03-17 NA HUSBANDING SERVICES, FORCE PROTECTION NA NA INCLUDES SECURITY GUARDS, DEMARCATION FLOATING PERIMETERS, PICKET BOATS, METAL DETECTORS, X-RAY MACHINE, EOD DIVERS, AND BARRIERS 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2A M2AC HUSBANDING SERVICES, REMOVAL SERVICES HUSBANDING SERVICES, REMOVAL SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, REMOVAL SERVICES NA NA INCLUDES SEWAGE, OILY WASTE AND TRASH 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2A M2AD HUSBANDING SERVICES, MATERIAL HANDLING HUSBANDING SERVICES, MATERIAL HANDLING NA 2020-03-17 NA HUSBANDING SERVICES, MATERIAL HANDLING NA NA INCLUDES CUSTOMS CLEARING, RECORDING, EXPORTING, TRANSPORTATION, AND MAIL DELIVERY 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2A M2AE HUSBANDING SERVICES, PURCHASING SERVICES HUSBANDING SERVICES, PURCHASING SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, PURCHASING SERVICES NA NA INCLUDES INCIDENTALS, LOCAL PROCUREMENT, AND PROVISIONS 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2A M2AF HUSBANDING SERVICES, INCIDENTAL SERVICES HUSBANDING SERVICES, INCIDENTAL SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, INCIDENTAL SERVICES NA NA INCLUDES MONEY EXCHANGE, LAUNDRY, INTERPRETER SERVICES, PAINT LIGHTER, TENTS 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2B M2BA HUSBANDING SERVICES, TRANSPORTATION SERVICES HUSBANDING SERVICES, TRANSPORTATION SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, TRANSPORTATION SERVICES NA NA INCLUDES CARS, MINI VANS/BUSES, WATER TAXIS & BOATS, CARGO VANS & TRUCKS, NAVIGATION 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2B M2BB HUSBANDING SERVICES, FUEL SERVICES HUSBANDING SERVICES, FUEL SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, FUEL SERVICES NA NA INCLUDES FUEL HANDLING, DELIVERY, AND LUBRICANTS 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2B M2BZ HUSBANDING SERVICES, OTHER PORT SERVICES HUSBANDING SERVICES, OTHER PORT SERVICES NA 2020-03-17 NA HUSBANDING SERVICES, OTHER PORT SERVICES NA NA INCLUDES PILOTS, TUGS, BROWS/FENDERS, BARGES, BERTHING, LINE HANDLERS, PORTABLE WATER, CRANES, FORKLIFTS, CUSTOMS AND IMMIGRATION, QUARANTINE, PORT ENTRY, CLEARANCE, AND FLEET LANDING 0 0 233
FALSE TRUE SERVICE M OPERATION OF GOVT OWNED FACILITY M2C M2CA SHIP HUSBANDING SERVICES, MANAGEMENT/INTEGRATION SERVICE SHIP HUSBANDING SERVICES, MANAGEMENT/INTEGRATION SERVICE NA 2020-03-17 NA SHIP HUSBANDING SERVICES, MANAGEMENT/INTEGRATION SERVICE NA NA INCLUDES MANAGEMENT BY A HUSBANDING SERVICES PROVIDER OR HUSBANDING SERVICE AGENT TO PROVIDE AN INTEGRATED MULTI-SERVICES SOLUTION FOR PORT SERVICES 0 0 233

Build a Network Graph of these New PSCs

Lets explore the connections of these 20 newest PSCs

hc_new_psc_node_graph <-
  tbl_10_new_psc %>%
  select(name_solicitation_group, name_product_service) %>%
  as_tbl_graph() %>%
  hchart() %>%
  hc_title(text = "New Product Service Codes Node Graph") %>%
  hc_add_theme(hc_theme_darkunica()) %>%
  hc_xAxis(visible = FALSE) %>%
  hc_yAxis(visible = FALSE)
hc_new_psc_node_graph

Part II: FPDS CSV

This section explores fpds_csv which provides real time access into the FPDS csv interface.

args(fpds_csv)

Acquire All Procurements of Pyrotechnics

tbl_pyro <-
  fpds_csv(product_or_service_code = "1370", snake_names = T)

Explore the Data Structure and Values

This is a great practice for exploring data

glimpse(tbl_pyro)

Rows: 6,118 Columns: 43 $ id_contract “16680197810D0001N0016477C0003”, “166… $ date_obligation 1978-10-15, 1978-11-15, 1978-11-15, … $ id_contract_analysis ”16680197810D0001N0016477C0003“,”166… $ type_procurement “AWARD”, “AWARD”, “AWARD”, “AWARD”, “… $ id_contract_idv ”N0016477C0003“,”N0016477C0028“,”N0… $ code_modification “0”, “0”, “0”, “0”, “0”, “0”, “0”, “0… $ number_transaction 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… $ type_award ”DCA DEFINITIVE CONTRACT“,”DCA DEFIN… $ amount_obligation 19000, 17000, 24000, 44000, 2739000, … $ date_solicitation NA, NA, NA, NA, NA, NA, NA, NA, NA, … $ id_agency_award “1700”, “1700”, “1700”, “1700”, “2100… $ name_agency_award ”DEPARTMENT OF THE NAVY“,”DEPARTMENT… $ name_office_award “NAVAL WEAPONS SUPPLY CENTER”, “NAVAL… $ type_product_or_service ”P“,”P“,”P“,”P“,”P“,”P“,”P“,”P… $ code_product_service “1370”, “1370”, “1370”, “1370”, “1370… $ id_naics NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ name_naics NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ id_duns NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ name_vendor ”S.N.C. SCIONTI“,”S.N.C. SCIONTI“,”… $ city_vendor NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ code_state_vendor NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ zipcode_vendor NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ id_duns_parent NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ name_vendor_parent “S.N.C. SCIONTI”, “S.N.C. SCIONTI”, “… $ code_additional_reporting NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ description_additonal_reporting NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ url_csv https://www.fpds.gov/ezsearch/fpdspo… $ year_fiscal_obligation 1978, 1978, 1978, 1978, 1978, 1978, 1… $ has_duns_parent FALSE, FALSE, FALSE, FALSE, FALSE, FA… $ is_missing_duns TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T… $ id_department_award “9700”, “9700”, “9700”, “9700”, “9700… $ name_department_award ”DEPARTMENT OF DEFENSE“,”DEPARTMENT … $ code_department_award 97, 97, 97, 97, 97, 97, 97, 97, 97, 9… $ type_contract_id_analysis “CONTRACT”, “CONTRACT”, “CONTRACT”, “… $ id_cgac_award 17, 17, 17, 17, 21, 21, 21, 21, 17, 2… $ name_agency_cgac_award ”DEPARTMENT OF THE NAVY“,”DEPARTMENT… $ slug_cgac_award “017”, “017”, “017”, “017”, “021”, “0… $ type_psc ”PRODUCT“,”PRODUCT“,”PRODUCT“,”PRO… $ name_product_service “PYROTECHNICS”, “PYROTECHNICS”, “PYRO… $ id_solicitation_group ”13“,”13“,”13“,”13“,”13“,”13“,”… $ name_solicitation_group “AMMUNITION AND EXPLOSIVES”, "AMMUNIT… $ has_parent NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ is_idv TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T…

skim(tbl_pyro)
Data summary
Name tbl_pyro
Number of rows 6118
Number of columns 43
_______________________
Column type frequency:
character 29
Date 2
logical 4
numeric 8
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id_contract 0 1.00 9 34 0 2477 0
id_contract_analysis 0 1.00 9 34 0 2475 0
type_procurement 0 1.00 5 5 0 1 0
id_contract_idv 3320 0.46 9 15 0 510 0
code_modification 0 1.00 1 21 0 222 0
type_award 0 1.00 17 51 0 10 0
id_agency_award 0 1.00 4 4 0 30 0
name_agency_award 112 0.98 14 45 0 28 0
name_office_award 23 1.00 3 53 0 254 0
type_product_or_service 0 1.00 1 1 0 1 0
code_product_service 0 1.00 4 4 0 1 0
name_naics 1548 0.75 7 118 0 103 0
name_vendor 0 1.00 4 63 0 623 0
city_vendor 190 0.97 4 18 0 396 0
code_state_vendor 389 0.94 1 9 0 57 0
zipcode_vendor 225 0.96 4 5 0 473 0
name_vendor_parent 224 0.96 4 52 0 432 0
code_additional_reporting 5817 0.05 1 4 0 2 0
description_additonal_reporting 5817 0.05 17 45 0 2 0
url_csv 0 1.00 169 169 0 1 0
id_department_award 343 0.94 4 4 0 10 0
name_department_award 0 1.00 19 45 0 11 0
type_contract_id_analysis 0 1.00 3 8 0 2 0
name_agency_cgac_award 0 1.00 19 45 0 14 0
slug_cgac_award 0 1.00 3 3 0 14 0
type_psc 0 1.00 7 7 0 1 0
name_product_service 0 1.00 12 12 0 1 0
id_solicitation_group 0 1.00 2 2 0 1 0
name_solicitation_group 0 1.00 25 25 0 1 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date_obligation 0 1.00 1978-10-15 2020-09-23 2008-11-25 2942
date_solicitation 5934 0.03 2013-04-11 2020-08-23 2018-08-24 54

Variable type: logical

skim_variable n_missing complete_rate mean count
has_duns_parent 0 1.00 0.97 TRU: 5938, FAL: 180
is_missing_duns 0 1.00 0.03 FAL: 5938, TRU: 180
has_parent 180 0.97 0.47 FAL: 3143, TRU: 2795
is_idv 0 1.00 0.46 FAL: 3320, TRU: 2798

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
number_transaction 0 1.00 0.44 2.01 0 0 0.00 0 17 ▇▁▁▁▁
amount_obligation 0 1.00 841978.71 3477601.15 -37099346 0 6699.57 145000 77351610 ▁▇▁▁▁
id_naics 1543 0.75 340155.69 69999.03 114210 325998 325998.00 332993 926140 ▁▇▁▁▁
id_duns 180 0.97 186291076.98 278747509.25 1007595 7031479 52814121.00 151954310 999195803 ▇▁▁▁▁
id_duns_parent 180 0.97 248556355.70 275090104.48 1007595 43190826 149528882.00 217304393 999195803 ▇▃▁▂▁
year_fiscal_obligation 0 1.00 2005.76 10.44 1978 2001 2008.00 2013 2020 ▂▂▂▇▇
code_department_award 343 0.94 90.59 19.99 12 97 97.00 97 97 ▁▁▁▁▇
id_cgac_award 0 1.00 28.27 19.99 12 17 21.00 21 97 ▇▁▁▁▁

What is the Total Spend?

First thing to look at is how much we have spent on these products.

tbl_by_day <- 
  tbl_pyro %>%
  group_by(date_obligation) %>%
  summarise(amount = sum(amount_obligation), .groups = "drop")

Lets take a look at the data

tbl_by_day %>% sample_n(3) %>% munge_data() %>% gt()
date_obligation amount
2011-02-24 $3,915,256
2011-03-29 $0
2011-11-23 $0

Dealing with $0 Obligations

tbl_by_day %>% filter(amount == 0) %>% nrow()

[1] 879

Lets filter them out

tbl_by_day <- tbl_by_day %>% filter(amount != 0)

Explore Outlays by Day

hc_by_day <-
  tbl_by_day %>%
  hchart("spline",
         hcaes(x = date_obligation, y = amount)) %>%
  hc_title(text = "Pyrotechnic Spend By Day Spend by Date") %>%
  hc_yAxis(title = list(text = "Amount Obligated")) %>%
  hc_xAxis(title = list(text = "Date Added")) %>%
  hc_add_theme(hc_theme_elementary())

What is the Cumulative Spend?

Lets add a cumulative total

tbl_by_day <-
  tbl_by_day %>%
  mutate(amount_cumulative = cumsum(amount))

Now lets explore it statically

gg_area_pyro <-
  tbl_by_day %>%
  ggplot(aes(date_obligation, amount_cumulative)) +
  geom_area(fill = "#22908C", alpha = .5) +
  scale_fill_viridis(discrete = TRUE) +
  scale_y_continuous(labels = scales::dollar) +
  scale_x_date() +
  theme(legend.position = "none") +
  theme_ipsum() +
  labs(title = "Cumulative Federal Procurement on Pyrotechnics", x = "Date", y = "Cumulative Procurement Spend")
gg_area_pyro

Lets add some model fits

gg_area_pyro <- 
  gg_area_pyro +
  geom_smooth(method = "lm") +
  geom_smooth(method = "loess", color = "black") 
gg_area_pyro

Do these Procurments Exhibit Seasonality?

Often products and services tend to be procured in September at the end of the budget year, does this product exhibit that trend?

Lets build a table that gives us the inputs

govt_months <- 
  c(
    "Oct",
    "Nov",
    "Dec",
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep"
  )

tbl_monthly_pyro <-
  tbl_pyro %>%
  mutate(month_obligation = lubridate::month(date_obligation, label = T)) %>%
  count(year_fiscal_obligation,
        month_obligation,
        wt = amount_obligation,
        name = "amount")

## Set the factor levels to budget months

tbl_monthly_pyro <- 
  tbl_monthly_pyro %>%
  mutate(month_obligation = factor(
    month_obligation,
    levels = govt_months,
    ordered = T
  ))

Lets turn this into an interactive heatmap.

fntltp <- JS("function(){
  return this.point.x + ' ' +  this.series.yAxis.categories[this.point.y] + ': ' +
  Highcharts.numberFormat(this.point.value, 2);
}")


hc_pyro_hm <-
  hchart(
    tbl_monthly_pyro,
    "heatmap",
    hcaes(x = year_fiscal_obligation,
          y = month_obligation,
          value = amount)
  ) %>%
  hc_colorAxis(
    stops = color_stops(20, colors = scales::viridis_pal(option = "B")(20)),
    # fuerza a utilzar mejor el espectro de colores para que HJS no amplie el
    # eje para tener numero "redondos
    startOnTick = FALSE,

    endOnTick =  FALSE,
    reversed = T
  ) %>%
  hc_yAxis(
    title = list(text = ""),
    reversed = TRUE,
    offset = -20,
    tickLength = 0,
    gridLineWidth = 0,
    minorGridLineWidth = 0,
    labels = list(style = list(fontSize = "9px"))
  ) %>%
  hc_tooltip(formatter = fntltp) %>%
  hc_title(text = "Spend by Month and Fiscal Year") %>%
  hc_legend(
    layout = "horizontal",
    verticalAlign = "top",
    align = "left",
    valueDecimals = 0
  ) %>%
  hc_add_theme(hc_theme_darkunica())
hc_pyro_hm

Looks like it fits the usual trend but can we actually quantify it?

Lets do a basic liner model exploring this.

mod <-
  rtemis::s.LM(x = tbl_monthly_pyro$month_obligation, y = tbl_monthly_pyro$amount, intercept = F)

[2020-11-05 16:14:51 rtemis::s.LM] Hello, alexbresler

[[ Regression Input Summary ]] Training features: 497 x 1 Training outcome: 497 x 1 Testing features: Not available Testing outcome: Not available

[2020-11-05 16:14:51 rtemis::s.LM] Training linear model…

[[ LM Regression Training Summary ]] MSE = 2.7e+14 (8.55%) RMSE = 1.6e+07 (4.37%) MAE = 1e+07 (4.84%) r = 0.29 (p = 3e-11) rho = 0.33 (p = 4.1e-14) R sq = 0.09 [2020-11-05 16:14:51 rtemis::s.LM] Run completed in 0.01 minutes (Real: 0.61; User: 0.38; System: 0.08)

Now lets explore the variable importance!

var_imp <- mod$varimp
tbl_coef <- 
  tibble(month = names(var_imp), amount_coef = var_imp) %>%
  arrange(desc(amount_coef)) %>%
  munge_data()

Let’s see

gt(tbl_coef)
month amount_coef
XSEP $21,731,668
XMAR $18,487,441
XAPR $12,047,660
XJUN $11,315,741
XAUG $10,117,001
XMAY $9,738,273
XFEB $9,336,068
XJUL $8,661,087
XJAN $7,656,582
XDEC $6,705,743
XNOV $4,033,896

Spend by Department

Lets take a look at spend by department.

tbl_depts <-
  tbl_pyro %>%
  count(
    name_department_award,
    wt = amount_obligation,
    name = "amount",
    sort = T
  ) %>%
  mutate(name_department_award = fct_reorder(name_department_award, amount))
tbl_depts %>% munge_data() %>% gt()
name_department_award amount
DEPARTMENT OF DEFENSE $5,112,237,946
NATIONAL AERONAUTICS AND SPACE ADMINISTRATION $23,919,053
GENERAL SERVICES ADMINISTRATION $7,093,773
DEPARTMENT OF THE INTERIOR $3,036,645
DEPARTMENT OF AGRICULTURE $2,489,224
DEPARTMENT OF TRANSPORTATION $995,000
DEPARTMENT OF STATE $662,286
DEPARTMENT OF ENERGY $293,108
DEPARTMENT OF HOMELAND SECURITY $241,504
DEPARTMENT OF JUSTICE $213,213
DEPARTMENT OF VETERANS AFFAIRS $44,000

Now we can visualize it.

gg_spend <-
  tbl_depts %>%
  mutate(amount_millions = amount /1000000) %>% 
  ggplot(aes(x = name_department_award, y = amount_millions)) +
  geom_segment(
    aes(
      x = name_department_award ,
      xend = name_department_award,
      y = 0,
      yend = amount_millions
    ),
    color = "grey"
  ) +
  geom_point(size = 3, color = "#69b3a2") +
  coord_flip() +
  theme_ipsum() +
  theme(
    legend.position = "none",
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    text = element_text(size = 7)
  ) +
  xlab("") +
  scale_y_log10(labels = scales::dollar, n.breaks = 10) +
  labs(title = "Which Departments Purchased the Explosives?",
       x = "",
       y = "Procurement $ in milions (log10 transformed)")
gg_spend

Contract Size by Agency Group

Now lets look at contract size against agency group.

tbl_pyro %>%
  filter(amount_obligation > 0) %>% count(name_agency_cgac_award,
                                          wt = amount_obligation,
                                          sort = T,
                                          name = "amount") %>%
  munge_data() %>%
  gt()
name_agency_cgac_award amount
DEPARTMENT OF THE ARMY $3,157,566,337
DEPARTMENT OF THE NAVY $1,288,027,486
DEPARTMENT OF THE AIR FORCE $730,536,689
DEPARTMENT OF DEFENSE $89,065,356
NATIONAL AERONAUTICS AND SPACE ADMINISTRATION $25,393,388
GENERAL SERVICES ADMINISTRATION $7,093,773
DEPARTMENT OF THE INTERIOR $3,037,345
DEPARTMENT OF AGRICULTURE $2,514,745
DEPARTMENT OF TRANSPORTATION $995,000
DEPARTMENT OF STATE $662,738
DEPARTMENT OF ENERGY $293,108
DEPARTMENT OF HOMELAND SECURITY $242,212
DEPARTMENT OF JUSTICE $219,532
DEPARTMENT OF VETERANS AFFAIRS $44,000

Looks like it is really skewed to a few agencies. Perfect situation to lump some of the groups

tbl_agency_sum <- 
  tbl_pyro %>%
  filter(amount_obligation > 0) %>%
  mutate(
    name_agency_cgac_award = name_agency_cgac_award %>% fct_lump(8, w = amount_obligation, other_level = "OTHER 6 AGENCIES")
  ) %>%
  group_by(name_agency_cgac_award, id_contract_analysis) %>%
  summarise(amount = sum(amount_obligation, na.rm = T)) %>%
  ungroup() %>%
  filter(amount > 1000) %>%
  mutate(name_agency_cgac_award = fct_reorder(name_agency_cgac_award, amount)) 

Now we can explore the range using a boxplot and even layer in the volume with a scatter plot!

gg_contract <- 
  tbl_agency_sum %>%
  ggplot(aes(
    x = factor(name_agency_cgac_award),
    y = amount,
    fill = name_agency_cgac_award
  )) +
  geom_boxplot() +
  geom_jitter(color = "black",
              size = 0.3,
              alpha = .5)  +
  scale_fill_viridis(discrete = TRUE, alpha = .5) +
  theme_ipsum() +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none",
        plot.title = element_text(size = 11)) +
  labs(
    title = "Distrubition of Award Size by Agency Group",
    x = "",
    y = "",
    credits = "Awards over $1000"
  ) +
  coord_flip()
gg_contract

How Do We Procure Pyrotechnics

Next lets see how the government procures this product and visualize it in a treemap.

Lets set the treemap parameters

lvl_opts <-  list(
  list(
    level = 1,
    borderWidth = 0,
    borderColor = "transparent",
    dataLabels = list(
      enabled = TRUE,
      align = "left",
      verticalAlign = "top",
      style = list(
        fontSize = "12px",
        textOutline = FALSE,
        color = "white"
      )
    )
  ),
  list(
    level = 2,
    borderWidth = 0,
    borderColor = "transparent",
    colorVariation = list(key = "brightness", to = 0.250),
    dataLabels = list(enabled = T),
    style = list(
      fontSize = "8px",
      textOutline = FALSE,
      color = "white"
    )
  )
)

Now we can build the treemap

hc_treemap_spend <-
  tbl_pyro %>%
  count(name_department_award, type_award, sort = T) %>%
  highcharter::data_to_hierarchical(
    group_vars = c("name_department_award", "type_award"),
    size_var = "n"
  ) %>%
  hchart(
    type = "treemap",
    levels = lvl_opts,
    tooltip = list(valueDecimals = FALSE)
  ) %>%
  hc_add_theme(hc_theme_superheroes())
hc_treemap_spend

Deepive: Pyrotechnic Vendors

Next lets take a look at the companies that supply this product.

First thing we want to do is build a summary table that lets us explore the data.

tbl_vendors <- 
  tbl_pyro %>%
  group_by(id_duns) %>%
  summarise(
    name_vendor = name_vendor[which.max(amount_obligation)],
    date_first_award = min(date_obligation, na.rm = T),
    date_recent_award = max(date_obligation, na.rm = T),
    count_actions = n(),
    count_contracts = n_distinct(id_contract_analysis, na.rm = T),
    amount_contracts = sum(amount_obligation),
    count_departments = n_distinct(name_department_award, na.rm = T),
    count_agencies = n_distinct(name_agency_cgac_award, na.rm = T)
  ) %>%
  arrange(desc(amount_contracts))

Who are the top 10 vendors?

tbl_vendors %>%
  slice(1:10) %>%
  munge_data() %>%
  gt()
id_duns name_vendor date_first_award date_recent_award count_actions count_contracts amount_contracts count_departments count_agencies
2341824 ALLOY SURFACES COMPANY, INC. 1990-09-15 2020-06-17 543 76 $1,136,321,359 1 4
128342156 ARMTEC COUNTERMEASURES COMPANY 2003-09-18 2020-07-23 499 44 $691,846,016 1 4
18243985 KILGORE FLARES COMPANY LLC 2007-03-30 2020-07-14 419 26 $621,649,431 1 4
824862254 KILGORE FLARES COMPANY LLC 1995-06-15 2009-03-23 160 35 $414,372,086 1 3
2241164 ATK THIOKOL INC 1997-06-02 2018-09-27 186 26 $317,356,699 1 4
7031479 SECURITY SIGNALS, INC 1981-12-15 2020-09-23 329 55 $264,515,231 1 2
611068453 ARMTEC DEFENSE PRODUCTS CO INC 2002-10-04 2020-07-30 112 9 $221,700,572 1 3
52814121 PYROTECHNIQUE BY GRUCCI, INC 2002-08-08 2020-05-20 246 9 $173,039,332 1 1
47966593 MARTIN ELECTRONICS INC 1982-07-15 2020-06-30 237 59 $136,776,436 1 4
7020159 KILGORE CORPORATION 1980-11-15 1995-03-15 75 74 $125,514,000 2 4

Lets skim the data

skim(tbl_vendors)
Data summary
Name tbl_vendors
Number of rows 499
Number of columns 9
_______________________
Column type frequency:
character 1
Date 2
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
name_vendor 0 1 4 63 0 463 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date_first_award 0 1 1978-10-15 2020-08-12 2002-04-17 400
date_recent_award 0 1 1979-12-15 2020-09-23 2005-08-30 429

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id_duns 1 1 218034258.18 292070392.45 1007595 27759418 86011125.5 193993510.8 999195803 ▇▁▁▁▁
count_actions 0 1 12.26 47.50 1 1 2.0 5.5 543 ▇▁▁▁▁
count_contracts 0 1 5.00 15.40 1 1 1.0 3.0 249 ▇▁▁▁▁
amount_contracts 0 1 10323097.70 72319422.98 -444000 20794 82407.2 455000.0 1136321359 ▇▁▁▁▁
count_departments 0 1 1.08 0.32 1 1 1.0 1.0 4 ▇▁▁▁▁
count_agencies 0 1 1.25 0.60 1 1 1.0 1.0 6 ▇▁▁▁▁

Lets filter our data to include only vendors with over $500,000 in obligations.

tbl_vendors <- tbl_vendors %>% filter(amount_contracts > 500000)

Now lets take a look at the top vendors visually.

gg_top_vendors_pyro <-
  tbl_vendors %>%
  filter(amount_contracts > 0) %>%
  mutate(
    name_vendor_lumped = name_vendor %>% fct_lump(10, w = amount_contracts, other_level = "ALL OTHER VENDORS")
  ) %>%
  count(name_vendor_lumped, wt = amount_contracts, name = "amount") %>%
  mutate(name_vendor_lumped = fct_reorder(name_vendor_lumped, amount)) %>%
  ggplot(aes(x = name_vendor_lumped, y = amount)) +
  geom_bar(stat = "identity", fill = "#B71212") +
  coord_flip() +
  theme_ipsum() +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "none"
  ) +
  xlab("") +
  labs(title = "Top Explosive Vendors") +
  scale_y_continuous(labels = scales::dollar, n.breaks = 10)
gg_top_vendors_pyro

Lets try to figure out how many clusters exist. We should transform the x and y axis to make it easier to process visually.

Lets try 4 clusters

mplot3.xy(
  log(tbl_vendors$count_actions),
  log(tbl_vendors$amount_contracts / 1000000),
  cluster = "PAM",
  cluster.params = list(k = 4),
  main = "Contracts Actions vs Contract $ by PAM Cluster Group",
  fit = "lm",
  xlab = "Contract Actions",
  ylab = "Contract $ (millions)",
  theme = "white"
)

Looks Good!

Now lets take a look at a normalzied view of all of the key vendor features.

tbl_vendors %>%
  select(-c(id_duns)) %>%
  select_if(is.numeric) %>%
  preprocess(scale = T) %>%
  mplot3.x(group.title = "Normalized Numeric Feature Distributions [Explosive Vendors - Over $500k in Obligations]",
           theme = 'white',
           density.line = T)

[2020-11-05 16:15:03 preprocess] Scaling 5 numeric features… [2020-11-05 16:15:03 preprocess] Done

Deepdive: Dimension Reduction Award Offices

First we need to build an input table with the data on the dimensions to be reduced

tbl_pyro_office <- 
  tbl_pyro %>%
  group_by(name_office_award) %>%
  summarise(
    department = name_department_award[which.max(amount_obligation)],
    agency = name_agency_cgac_award[which.max(amount_obligation)],
    year_first = year(date_obligation) %>% min(),
    date_recent_award = year(date_obligation) %>% max(),
    count_actions = n(),
    count_contracts = n_distinct(id_contract_analysis, na.rm = T),
    amount_contracts = sum(amount_obligation),
    count_distinct_vendors = n_distinct(id_duns, na.rm = T),
    count_distinct_parents = n_distinct(id_duns_parent, na.rm = T),
    .groups = "drop"
  ) %>%
  arrange(desc(amount_contracts))

tbl_pyro_office <-
  tbl_pyro_office %>%
  mutate(agency_lumped = fct_lump(agency, n = 6, other_level = "ALL OTHER AGENCIES"))
reactable(tbl_pyro_office, filterable = F,
    resizable = T,
    searchable = T,
    showPageSizeOptions = T,
    pageSizeOptions = c(5, 10, 20),
    sortable = T,
    )
tbl_umap <-
  tbl_pyro_office %>%
  select_if(is.numeric) %>%
  uwot::umap(n_neighbors = 14, metric = "manhattan") %>%
  as_tibble() %>% 
  setNames(c("umap_001", "umap_002"))


tbl_pyro_office <- 
  tbl_pyro_office %>% 
  bind_cols(
    tbl_umap
  )

Finally we can visualize the output.

x <-
  c(
    "Department",
    "Agency",
    "Office",
    "Contracts",
    "Vendors",
    "Amount $",
    "Actions",
    "umap 1",
    "umap 2"
  )

y <-
  sprintf(
    "{point.%s:.2f}",
    c(
      "department",
      "agency",
      "name_office_award",
      "count_actions",
      "count_distinct_vendors",
      "amount_contracts",
      "count_actions",
      "umap_001",
      "umap_002"
    )
  )

tltip <- tooltip_table(x, y)


hc_umap <-
  tbl_pyro_office %>%
  hchart(
    "scatter",
    hcaes(
      x = umap_001,
      y = umap_002,
      group = agency_lumped,
      name = name_office_award
    ),
    marker = list(radius = 1, symbol = 'circle')
  ) %>%
  hc_add_theme(hc_theme_darkunica()) %>%
  hc_xAxis(visible = F) %>%
  hc_yAxis(visible = F) %>%
  hc_title(text = "Pyrotechnic Office Dimension Reduction") %>%
  hc_tooltip(
    useHTML = TRUE,
    headerFormat = "{point.name}",
    pointFormat = tltip,
    table = T
  )
hc_umap

Part III: FPDS Atom

Next we will explore FPDS atom, this provides the full interface into all the data contained in FPDS going back to 1978.

This provides access to significantly more data but is extremely compute heavy and exponentially slower than fpds_csv

Function Base Paramaters

args(fpds_atom)

function (global_vendor_name = NA, vendor_name = NA, parent_vendor_name = NA, department_name = NA, award_type = NA, research = NA, vendor_duns_number = NA, parent_duns_number = NA, vendor_doing_business_as_name = NA, agency_name = NA, contracting_office_name = NA, contracting_agency_name = NA, principal_naics_code = NA, award_status = NA, subcontract_plan = NA, solicitation_procedure = NA, contract_type = NA, contract_type_description = NA, type_of_contract_pricing = NA, contract_id = NA, ref_idv_contract_id = NA, ref_idv_agency_id = NA, contracting_agency_id = NA, contracting_office_id = NA, funding_agency_id = NA, funding_office_id = NA, funding_office_name = NA, agency_code = NA, department_id = NA, last_mod_date = NA, last_modified_by = NA, award_completion_date = NA, created_date = NA, signed_date = NA, effective_date = NA, estimated_completion_date = NA, cancellation_date = NA, destroy_date = NA, final_invoice_paid_date = NA, funded_through_date = NA, last_modified_date = NA, physical_completion_date = NA, reveal_date = NA, solicitation_issue_date = NA, sys_last_modified_date = NA, vendor_registration_date = NA, vendor_renewal_date = NA, base_exercised_options_value = NA, current_contract_value = NA, dollars_obligated = NA, contract_value = NA, fee_range_lower_value = NA, fee_range_upper_value = NA, fixed_fee_value = NA, obligated_amount = NA, total_current_contract_value = NA, total_dollars_obligated = NA, total_non_government_value = NA, total_ultimate_contract_value = NA, ultimate_contract_value = NA, contract_fiscal_year = NA, created_by = NA, description_of_requirement = NA, reason_for_modification = NA, legislative_mandates = NA, local_area_set_aside = NA, socio_economic_indicators = NA, multiyear_contract = NA, national_interest_code = NA, product_or_service_code = NA, performance_district_code = NA, performance_country = NA, performance_state_name = NA, vendor_address_city = NA, vendor_congress_district_code = NA, vendor_address_country_code = NA, vendor_address_country_name = NA, vendor_address_state_code = NA, vendor_address_state_name = NA, vendor_address_zip_code = NA, extent_competed = NA, number_of_offers_received = NA, sort_item = “Signed Date”, use_future = F, show_progress = T, clean_address = T, clean_entity_column = T, sort_descending = T, parse_contracts = F, snake_names = F, keep_key_columns = F, exclude_bloat = F, unformat = T, return_message = T, …) NULL

Acquire All Procurements for Anduril

tbl_anduril <-
  fpds_atom(
    vendor_name = "ANDURIL",
    parse_contracts = T,
    snake_names = T
  )

Lets explore the data

glimpse(tbl_anduril)

Rows: 43 Columns: 334 $ id_contract_analysis … $ id_contract … $ date_obligation … $ id_duns … $ id_duns_parent … $ id_duns_analysis … $ id_contract_idv … $ name_contract … $ description_obligation … $ datetime_contract_modified … $ name_vendor … $ name_vendor_parent … $ address_street1vendor … $ cage_vendor … $ city_vendor … $ code_congressional_district_vendor … $ code_country_vendor … $ code_site_alternate_vendor_vendor … $ code_site_vendor_vendor … $ code_state_vendor … $ telephone_vendor … $ zipcode_vendor … $ name_state_vendor … $ code_consolidated_contract … $ date_contract_completion_current … $ date_contract_completion_ultimate … $ date_contract_effective … $ date_registration_sam … $ date_renewal_sam … $ datetime_transaction_approved … $ datetime_transaction_created … $ datetime_transaction_last_modified … $ type_consolidated_contract … $ amount_base_all_option … $ amount_base_all_option_total … $ amount_base_and_exercised_option … $ amount_base_and_exercised_option_total … $ amount_fee_paid_for_service … $ amount_obligation … $ amount_obligation_total … $ amount_estimated_total … $ amount_individual_order_limit … $ amount_non_governmental_dollars … $ id_agency_funding … $ name_agency_award … $ name_agency_award_idv … $ name_agency_funding … $ id_agency_award … $ id_agency_award_idv … $ code_inter_agency_contracting_authority … $ is_federal_government_agency … $ type_inter_agency_contracting_authority … $ name_office_award … $ name_office_funding … $ id_office_award … $ id_office_funding … $ code_contracting_officer_business_size_determination … $ type_contracting_officer_business_size_determination … $ id_naics … $ name_naics … $ code_country_origin_product_service … $ code_product_service … $ name_country_origin_product_service … $ name_product_service … $ count_actions … $ count_offers_received_idv … $ count_offers_received … $ url_fpds_contract_atom … $ code_action … $ code_additional_reporting_value … $ code_commercial_acquisition_procedures … $ code_congressional_district_performance … $ code_contract_bundling … $ code_contract_competition … $ code_cost_pricing_data … $ code_country_incorporation … $ code_country_performance … $ code_domestic_foreign_entity … $ code_epa_designated_product … $ code_evaluated_preference … $ code_humanitarian_peackeeping_operation … $ code_idv_set_aside … $ code_inherently_government_function … $ code_list_of_additional_reporting_values … $ code_manufacturing_organization … $ code_materials_supplies_articles_equipment … $ code_modification … $ code_modification_idv … $ code_multiple_or_single_idv … $ code_national_interest_action … $ code_offers_source … $ code_performance_based_service_contract … $ code_place_of_manufacture … $ code_recovered_materials … $ code_referenced_idv … $ code_research … $ code_set_aside_source … $ code_solicitation_procedures … $ code_state_incorporation … $ code_state_performance … $ code_status_transaction … $ code_sub_contract_plan … $ code_undefinitized_action … $ generic_string06 … $ has_a76fair_act_action … $ has_clinger_cohen_act_planning … $ has_construction_wage_requirements … $ has_contracts … $ has_contracts_and_grants … $ has_cost_or_pricing_data … $ has_credit_card_purchase_method … $ has_gfe_or_gfp … $ has_grants … $ is1862land_grant_college … $ is1890land_grant_college … $ is1994land_grant_college … $ is_airport_authority … $ is_alaskan_native_owned_corporation_or_firm … $ is_alaskan_native_servicing_institution … $ is_american_indian_owned … $ is_asian_pacific_american_owned_business … $ is_black_american_owned_business … $ is_city_local_government … $ is_commercial_item_test_program … $ is_community_developed_corporation_owned … $ is_community_developed_corporation_owned_firm … $ is_community_development_corporation … $ is_corporate_entity_not_tax_exempt … $ is_corporate_entity_tax_exempt … $ is_council_of_governments … $ is_county_local_government … $ is_domestic_shelter … $ is_dot_certified_disadvantaged_business_enterprise … $ is_economically_disadvantaged_women_owned_small_business … $ is_educational_institution … $ is_federal_government … $ is_federally_funded_research_and_development_corp … $ is_foreign_government … $ is_foreign_owned_and_located … $ is_for_profit_organization … $ is_foundation … $ is_hispanic_american_owned_business … $ is_hispanic_servicing_institution … $ is_historically_black_college_or_university … $ is_hospital … $ is_housing_authorities_public_or_tribal … $ is_indian_tribe_owned … $ is_inter_municipal_local_government … $ is_international_organization … $ is_interstate_entity … $ is_joint_venture_economically_disadvantaged_women_owned_small_business … $ is_joint_venture_women_owned_small_business … $ is_labor_surplus_area_firm … $ is_limited_liability_corporation … $ is_local_area_set_aside … $ is_local_government … $ is_local_government_owned … $ is_manufacturer_of_goods … $ is_minority_institution … $ is_minority_owned … $ is_multi_year_contract … $ is_municipality_local_government … $ is_native_american_owned_business … $ is_native_hawaiian_owned_organization_or_firm … $ is_native_hawaiian_servicing_institution … $ is_nonprofit_organization … $ is_other_minority_owned … $ is_other_not_for_profit_organization … $ is_partnership_or_limited_liability_partnership … $ is_planning_commission … $ is_port_authority … $ is_private_university_or_college … $ is_sba_certified8a_joint_venture … $ is_sba_certified8a_program_participant … $ is_sba_certified_hub_zone … $ is_sba_certified_small_disadvantaged_business … $ is_school_district_local_government … $ is_school_of_forestry … $ is_self_certified_hub_zone_joint_venture … $ is_self_certified_small_disadvantaged_business … $ is_service_related_disabled_veteran_owned_business … $ is_sheltered_workshop … $ is_small_agricultural_cooperative … $ is_small_business … $ is_sole_propreitorship … $ is_state_controlled_institution_of_higher_learning … $ is_state_government … $ is_subchapter_s_corporation … $ is_sub_continent_asian_american_owned_business … $ is_township_local_government … $ is_transaction_closed … $ is_transit_authority … $ is_tribal_college … $ is_tribal_government … $ is_tribally_owned_firm … $ is_us_government_entity … $ is_very_small_business … $ is_veteran_owned … $ is_veterinary_college … $ is_veterinary_hospital … $ is_women_owned … $ is_women_owned_small_business … $ number_transaction … $ transaction_approved_by … $ transaction_created_by … $ transaction_last_modified_by … $ type_organization … $ zipcode_performance … $ name_country_incorporation … $ name_country_performance … $ name_state_incorporation … $ name_state_performance … $ type_a76fair_act_action … $ type_action … $ type_additional_reporting_value … $ type_clinger_cohen_act_planning … $ type_commercial_acquisition_procedures … $ type_commercial_item_test_program … $ type_construction_wage_requirements … $ type_contract_bundling … $ type_contract_competition … $ type_cost_or_pricing_data … $ type_cost_pricing_data … $ type_credit_card_purchase_method … $ type_domestic_foreign_entity … $ type_epa_designated_product … $ type_evaluated_preference … $ type_fbo_sourced … $ type_gfe_or_gfp … $ type_humanitarian_peackeeping_operation … $ type_idv_set_aside … $ type_inherently_government_function … $ type_labor_standards_clause … $ type_local_area_set_aside … $ type_manufacturing_organization … $ type_materials_supplies_articles_equipment … $ type_multiple_or_single_idv … $ type_multi_year_contract … $ type_national_interest_action … $ type_offers_source … $ type_performance_based_service_contract … $ type_place_of_manufacture … $ type_recovered_materials … $ type_referenced_idv … $ type_research … $ type_set_aside_source … $ type_solicitation_procedures … $ type_status_transaction … $ type_sub_contract_plan … $ type_undefinitized_action … $ code_contract_financing … $ code_contract_modification … $ code_cost_accounting_clause … $ code_equipment_system … $ code_not_compete_reason … $ code_program_claimant … $ code_set_aside … $ code_technology_item … $ pct_price_difference_price_evaluation … $ type_contract_financing … $ type_contract_modification … $ type_cost_accounting_clause … $ type_equipment_system … $ type_not_compete_reason … $ type_program_claimant … $ type_sea_transported … $ type_set_aside … $ type_technology_item … $ code_idc … $ code_idc_contract … $ code_who_can_use … $ id_solicitation … $ type_idc … $ type_idc_contract … $ code_non_traditional_government_contractor_participation … $ type_agreement … $ year_fiscal_contract … $ type_non_traditional_government_contractor_participation … $ url_fpds_atom … $ has_contract_parent … $ is_idv … $ location_vendor … $ city_state_vendor … $ location_performance … $ has_duns_parent … $ is_missing_duns … $ year_budget … $ id_department_award … $ name_department_award … $ code_department_award … $ id_department_funding … $ name_department_funding … $ code_department_funding … $ type_dod_award … $ type_contract_id_analysis … $ id_cgac_funding … $ name_agency_cgac_funding … $ slug_cgac_funding … $ id_cgac_award … $ name_agency_cgac_award … $ slug_cgac_award … $ id_cgac_agency_idv … $ name_cgac_agency_idv … $ slug_cgac_agency_idv … $ names_vendor_listed … $ count_vendors_listed … $ has_multiple_vendors … $ names_vendor_parent_listed … $ count_vendors_parent_listed … $ has_multiple_parent_vendors … $ is_possible_sbirsttr … $ is_filtered_sbir … $ has_labor_standards_clause … $ is_fbo_sourced … $ is_sea_transported … $ datetime_transaction_closed … $ code_fee_for_use_of_service … $ email_contract … $ url_website … $ type_fee_for_use_of_service … $ transaction_closed_by … $ slug_agency_funding … $ slug_office_funding … $ id_federal_account … $ code_account_omb … $ code_treasury_account_symbol … $ id_cgac … $ code_treasury_sub_account_symbol … $ id_initiative … $ name_initiative … $ id_federal_account_resolved … $ amount_obligation_allocated … $ has_federal_account … $ fax_vendor

skim(tbl_anduril)
Data summary
Name tbl_anduril
Number of rows 43
Number of columns 334
_______________________
Column type frequency:
character 184
Date 6
logical 111
numeric 28
POSIXct 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id_contract_analysis 0 1.00 13 17 0 18 0
id_contract 0 1.00 13 17 0 18 0
id_contract_idv 38 0.12 13 17 0 2 0
name_contract 0 1.00 64 106 0 31 0
description_obligation 0 1.00 8 122 0 33 0
name_vendor 0 1.00 22 23 0 2 0
name_vendor_parent 0 1.00 22 23 0 2 0
address_street1vendor 0 1.00 17 24 0 3 0
cage_vendor 1 0.98 5 5 0 1 0
city_vendor 0 1.00 6 10 0 3 0
code_congressional_district_vendor 0 1.00 2 2 0 3 0
code_country_vendor 0 1.00 3 3 0 1 0
code_site_alternate_vendor_vendor 0 1.00 5 9 0 3 0
code_site_vendor_vendor 0 1.00 9 9 0 2 0
code_state_vendor 0 1.00 2 2 0 2 0
telephone_vendor 0 1.00 10 10 0 3 0
zipcode_vendor 0 1.00 5 5 0 3 0
name_state_vendor 0 1.00 8 10 0 2 0
code_consolidated_contract 10 0.77 1 1 0 2 0
type_consolidated_contract 10 0.77 2 16 0 2 0
id_agency_funding 4 0.91 4 4 0 5 0
name_agency_award 0 1.00 16 32 0 4 0
name_agency_award_idv 38 0.12 21 32 0 2 0
name_agency_funding 4 0.91 16 41 0 5 0
id_agency_award 0 1.00 4 4 0 4 0
id_agency_award_idv 38 0.12 4 4 0 2 0
code_inter_agency_contracting_authority 10 0.77 1 1 0 1 0
type_inter_agency_contracting_authority 10 0.77 14 14 0 1 0
name_office_award 0 1.00 9 32 0 12 0
name_office_funding 4 0.91 4 35 0 15 0
id_office_award 0 1.00 6 6 0 12 0
id_office_funding 4 0.91 6 6 0 15 0
code_contracting_officer_business_size_determination 10 0.77 1 1 0 2 0
type_contracting_officer_business_size_determination 10 0.77 14 25 0 2 0
name_naics 10 0.77 19 72 0 4 0
code_country_origin_product_service 13 0.70 3 3 0 1 0
code_product_service 0 1.00 4 4 0 10 0
name_country_origin_product_service 13 0.70 13 13 0 1 0
name_product_service 0 1.00 12 59 0 8 0
url_fpds_contract_atom 0 1.00 107 111 0 18 0
code_action 0 1.00 1 1 0 5 0
code_additional_reporting_value 11 0.74 1 4 0 2 0
code_commercial_acquisition_procedures 10 0.77 1 1 0 2 0
code_congressional_district_performance 3 0.93 2 2 0 8 0
code_contract_bundling 10 0.77 1 1 0 2 0
code_contract_competition 0 1.00 1 1 0 5 0
code_cost_pricing_data 10 0.77 1 1 0 2 0
code_country_incorporation 1 0.98 3 3 0 1 0
code_country_performance 3 0.93 3 3 0 1 0
code_domestic_foreign_entity 10 0.77 1 1 0 1 0
code_epa_designated_product 13 0.70 1 1 0 2 0
code_evaluated_preference 10 0.77 4 4 0 1 0
code_humanitarian_peackeeping_operation 10 0.77 1 1 0 1 0
code_idv_set_aside 38 0.12 3 4 0 2 0
code_inherently_government_function 11 0.74 2 2 0 1 0
code_list_of_additional_reporting_values 11 0.74 1 4 0 2 0
code_manufacturing_organization 10 0.77 1 1 0 1 0
code_materials_supplies_articles_equipment 10 0.77 1 1 0 2 0
code_modification 0 1.00 1 6 0 5 0
code_modification_idv 38 0.12 1 1 0 1 0
code_multiple_or_single_idv 38 0.12 1 1 0 1 0
code_national_interest_action 10 0.77 4 4 0 1 0
code_offers_source 11 0.74 1 1 0 2 0
code_performance_based_service_contract 10 0.77 1 1 0 3 0
code_place_of_manufacture 13 0.70 1 1 0 2 0
code_recovered_materials 10 0.77 1 1 0 1 0
code_referenced_idv 38 0.12 1 1 0 1 0
code_research 15 0.65 3 3 0 4 0
code_set_aside_source 11 0.74 1 1 0 2 0
code_solicitation_procedures 0 1.00 2 3 0 4 0
code_state_incorporation 1 0.98 2 2 0 1 0
code_state_performance 3 0.93 2 2 0 5 0
code_status_transaction 0 1.00 1 1 0 1 0
code_sub_contract_plan 10 0.77 1 1 0 1 0
code_undefinitized_action 10 0.77 1 1 0 1 0
generic_string06 40 0.07 1 1 0 1 0
transaction_approved_by 0 1.00 10 39 0 17 0
transaction_created_by 0 1.00 10 39 0 19 0
transaction_last_modified_by 0 1.00 9 39 0 19 0
type_organization 0 1.00 11 24 0 2 0
zipcode_performance 3 0.93 5 5 0 7 0
name_country_incorporation 1 0.98 13 13 0 1 0
name_country_performance 3 0.93 13 13 0 1 0
name_state_incorporation 1 0.98 8 8 0 1 0
name_state_performance 3 0.93 5 20 0 5 0
type_a76fair_act_action 10 0.77 2 2 0 1 0
type_action 0 1.00 3 27 0 6 0
type_additional_reporting_value 11 0.74 17 35 0 2 0
type_clinger_cohen_act_planning 10 0.77 2 3 0 2 0
type_commercial_acquisition_procedures 10 0.77 15 35 0 2 0
type_commercial_item_test_program 10 0.77 2 2 0 1 0
type_construction_wage_requirements 10 0.77 2 14 0 2 0
type_contract_bundling 10 0.77 11 25 0 2 0
type_contract_competition 0 1.00 8 52 0 5 0
type_cost_or_pricing_data 20 0.53 2 2 0 1 0
type_cost_pricing_data 10 0.77 15 16 0 2 0
type_credit_card_purchase_method 13 0.70 2 3 0 2 0
type_domestic_foreign_entity 10 0.77 14 14 0 1 0
type_epa_designated_product 13 0.70 12 18 0 2 0
type_evaluated_preference 10 0.77 18 18 0 1 0
type_fbo_sourced 10 0.77 2 14 0 3 0
type_gfe_or_gfp 10 0.77 32 32 0 1 0
type_humanitarian_peackeeping_operation 10 0.77 14 14 0 1 0
type_idv_set_aside 38 0.12 18 32 0 2 0
type_inherently_government_function 11 0.74 15 15 0 1 0
type_labor_standards_clause 10 0.77 2 14 0 2 0
type_local_area_set_aside 10 0.77 2 2 0 1 0
type_manufacturing_organization 10 0.77 19 19 0 1 0
type_materials_supplies_articles_equipment 10 0.77 2 14 0 2 0
type_multiple_or_single_idv 38 0.12 12 12 0 1 0
type_multi_year_contract 20 0.53 2 2 0 1 0
type_national_interest_action 10 0.77 4 4 0 1 0
type_offers_source 11 0.74 3 11 0 2 0
type_performance_based_service_contract 10 0.77 14 35 0 3 0
type_place_of_manufacture 13 0.70 11 30 0 2 0
type_recovered_materials 10 0.77 50 50 0 1 0
type_referenced_idv 38 0.12 3 3 0 1 0
type_research 15 0.65 52 59 0 4 0
type_set_aside_source 11 0.74 3 11 0 2 0
type_solicitation_procedures 0 1.00 15 25 0 4 0
type_status_transaction 0 1.00 5 5 0 1 0
type_sub_contract_plan 10 0.77 17 17 0 1 0
type_undefinitized_action 10 0.77 2 2 0 1 0
code_contract_financing 15 0.65 1 1 0 1 0
code_contract_modification 18 0.58 1 1 0 5 0
code_cost_accounting_clause 25 0.42 1 1 0 1 0
code_equipment_system 15 0.65 3 3 0 1 0
code_not_compete_reason 26 0.40 3 3 0 3 0
code_program_claimant 13 0.70 2 3 0 5 0
code_set_aside 15 0.65 3 4 0 2 0
code_technology_item 18 0.58 1 1 0 2 0
type_contract_financing 15 0.65 14 14 0 1 0
type_contract_modification 18 0.58 15 44 0 5 0
type_cost_accounting_clause 25 0.42 30 30 0 1 0
type_equipment_system 15 0.65 4 4 0 1 0
type_not_compete_reason 26 0.40 19 23 0 3 0
type_program_claimant 13 0.70 8 60 0 5 0
type_sea_transported 13 0.70 2 7 0 2 0
type_set_aside 15 0.65 18 32 0 2 0
type_technology_item 18 0.58 19 27 0 2 0
code_idc 40 0.07 1 1 0 1 0
code_idc_contract 40 0.07 1 1 0 1 0
code_who_can_use 40 0.07 13 14 0 2 0
id_solicitation 31 0.28 13 17 0 5 0
type_idc 40 0.07 12 12 0 1 0
type_idc_contract 40 0.07 41 41 0 1 0
code_non_traditional_government_contractor_participation 33 0.23 3 3 0 1 0
type_agreement 33 0.23 9 9 0 1 0
year_fiscal_contract 33 0.23 4 4 0 2 0
type_non_traditional_government_contractor_participation 33 0.23 40 40 0 1 0
url_fpds_atom 0 1.00 131 132 0 5 0
location_vendor 0 1.00 34 45 0 3 0
city_state_vendor 0 1.00 10 14 0 3 0
location_performance 3 0.93 8 8 0 7 0
id_department_award 0 1.00 4 4 0 2 0
name_department_award 0 1.00 21 31 0 2 0
id_department_funding 4 0.91 4 4 0 2 0
name_department_funding 4 0.91 21 31 0 2 0
type_dod_award 3 0.93 1 2 0 2 0
type_contract_id_analysis 13 0.70 8 8 0 1 0
name_agency_cgac_funding 4 0.91 21 31 0 4 0
slug_cgac_funding 4 0.91 3 3 0 4 0
name_agency_cgac_award 0 1.00 21 31 0 4 0
slug_cgac_award 0 1.00 3 3 0 4 0
name_cgac_agency_idv 38 0.12 21 31 0 2 0
slug_cgac_agency_idv 38 0.12 3 3 0 2 0
names_vendor_listed 0 1.00 22 23 0 2 0
names_vendor_parent_listed 0 1.00 22 23 0 2 0
code_fee_for_use_of_service 41 0.05 2 2 0 1 0
email_contract 41 0.05 23 23 0 1 0
url_website 41 0.05 20 20 0 1 0
type_fee_for_use_of_service 41 0.05 6 6 0 1 0
transaction_closed_by 42 0.02 12 12 0 1 0
slug_agency_funding 41 0.05 5 5 0 1 0
slug_office_funding 42 0.02 4 4 0 1 0
id_federal_account 35 0.19 8 8 0 4 0
code_account_omb 35 0.19 4 4 0 4 0
code_treasury_account_symbol 35 0.19 6 9 0 4 0
id_cgac 35 0.19 2 2 0 3 0
code_treasury_sub_account_symbol 41 0.05 3 3 0 2 0
id_initiative 42 0.02 4 4 0 1 0
name_initiative 42 0.02 38 38 0 1 0
id_federal_account_resolved 35 0.19 8 8 0 4 0
fax_vendor 42 0.02 10 10 0 1 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date_obligation 0 1.00 2012-07-16 2020-09-02 2019-12-02 41
date_contract_completion_current 3 0.93 2012-11-01 2022-05-08 2020-08-19 23
date_contract_completion_ultimate 3 0.93 2012-11-01 2022-05-08 2020-12-30 23
date_contract_effective 0 1.00 2012-08-01 2020-09-02 2019-09-30 20
date_registration_sam 0 1.00 2002-08-28 2018-07-19 2018-07-19 2
date_renewal_sam 0 1.00 2013-01-11 2021-05-22 2020-08-26 6

Variable type: logical

skim_variable n_missing complete_rate mean count
is_federal_government_agency 0 1.00 0.00 FAL: 43
has_a76fair_act_action 10 0.77 0.00 FAL: 33
has_clinger_cohen_act_planning 10 0.77 0.03 FAL: 32, TRU: 1
has_construction_wage_requirements 38 0.12 0.00 FAL: 5
has_contracts 0 1.00 0.02 FAL: 42, TRU: 1
has_contracts_and_grants 0 1.00 0.98 TRU: 42, FAL: 1
has_cost_or_pricing_data 20 0.53 0.00 FAL: 23
has_credit_card_purchase_method 13 0.70 0.03 FAL: 29, TRU: 1
has_gfe_or_gfp 10 0.77 0.00 FAL: 33
has_grants 0 1.00 0.00 FAL: 43
is1862land_grant_college 0 1.00 0.00 FAL: 43
is1890land_grant_college 0 1.00 0.00 FAL: 43
is1994land_grant_college 0 1.00 0.00 FAL: 43
is_airport_authority 0 1.00 0.00 FAL: 43
is_alaskan_native_owned_corporation_or_firm 0 1.00 0.00 FAL: 43
is_alaskan_native_servicing_institution 0 1.00 0.00 FAL: 43
is_american_indian_owned 0 1.00 0.00 FAL: 43
is_asian_pacific_american_owned_business 0 1.00 0.00 FAL: 43
is_black_american_owned_business 0 1.00 0.00 FAL: 43
is_city_local_government 0 1.00 0.00 FAL: 43
is_commercial_item_test_program 10 0.77 0.00 FAL: 33
is_community_developed_corporation_owned 0 1.00 0.00 FAL: 43
is_community_developed_corporation_owned_firm 0 1.00 0.00 FAL: 43
is_community_development_corporation 0 1.00 0.00 FAL: 43
is_corporate_entity_not_tax_exempt 0 1.00 0.98 TRU: 42, FAL: 1
is_corporate_entity_tax_exempt 0 1.00 0.00 FAL: 43
is_council_of_governments 0 1.00 0.00 FAL: 43
is_county_local_government 0 1.00 0.00 FAL: 43
is_domestic_shelter 0 1.00 0.00 FAL: 43
is_dot_certified_disadvantaged_business_enterprise 0 1.00 0.00 FAL: 43
is_economically_disadvantaged_women_owned_small_business 0 1.00 0.00 FAL: 43
is_educational_institution 0 1.00 0.00 FAL: 43
is_federal_government 0 1.00 0.00 FAL: 43
is_federally_funded_research_and_development_corp 0 1.00 0.00 FAL: 43
is_foreign_government 0 1.00 0.00 FAL: 43
is_foreign_owned_and_located 0 1.00 0.00 FAL: 43
is_for_profit_organization 0 1.00 1.00 TRU: 43
is_foundation 0 1.00 0.00 FAL: 43
is_hispanic_american_owned_business 0 1.00 0.00 FAL: 43
is_hispanic_servicing_institution 0 1.00 0.00 FAL: 43
is_historically_black_college_or_university 0 1.00 0.00 FAL: 43
is_hospital 0 1.00 0.00 FAL: 43
is_housing_authorities_public_or_tribal 0 1.00 0.00 FAL: 43
is_indian_tribe_owned 0 1.00 0.00 FAL: 43
is_inter_municipal_local_government 0 1.00 0.00 FAL: 43
is_international_organization 0 1.00 0.00 FAL: 43
is_interstate_entity 0 1.00 0.00 FAL: 43
is_joint_venture_economically_disadvantaged_women_owned_small_business 0 1.00 0.00 FAL: 43
is_joint_venture_women_owned_small_business 0 1.00 0.00 FAL: 43
is_labor_surplus_area_firm 0 1.00 0.00 FAL: 43
is_limited_liability_corporation 0 1.00 0.00 FAL: 43
is_local_area_set_aside 10 0.77 0.00 FAL: 33
is_local_government 0 1.00 0.00 FAL: 43
is_local_government_owned 0 1.00 0.00 FAL: 43
is_manufacturer_of_goods 0 1.00 0.00 FAL: 43
is_minority_institution 0 1.00 0.00 FAL: 43
is_minority_owned 0 1.00 0.00 FAL: 43
is_multi_year_contract 20 0.53 0.00 FAL: 23
is_municipality_local_government 0 1.00 0.00 FAL: 43
is_native_american_owned_business 0 1.00 0.00 FAL: 43
is_native_hawaiian_owned_organization_or_firm 0 1.00 0.00 FAL: 43
is_native_hawaiian_servicing_institution 0 1.00 0.00 FAL: 43
is_nonprofit_organization 0 1.00 0.00 FAL: 43
is_other_minority_owned 0 1.00 0.00 FAL: 43
is_other_not_for_profit_organization 0 1.00 0.00 FAL: 43
is_partnership_or_limited_liability_partnership 0 1.00 0.02 FAL: 42, TRU: 1
is_planning_commission 0 1.00 0.00 FAL: 43
is_port_authority 0 1.00 0.00 FAL: 43
is_private_university_or_college 0 1.00 0.00 FAL: 43
is_sba_certified8a_joint_venture 0 1.00 0.00 FAL: 43
is_sba_certified8a_program_participant 0 1.00 0.00 FAL: 43
is_sba_certified_hub_zone 0 1.00 0.00 FAL: 43
is_sba_certified_small_disadvantaged_business 0 1.00 0.00 FAL: 43
is_school_district_local_government 0 1.00 0.00 FAL: 43
is_school_of_forestry 0 1.00 0.00 FAL: 43
is_self_certified_hub_zone_joint_venture 0 1.00 0.00 FAL: 43
is_self_certified_small_disadvantaged_business 0 1.00 0.00 FAL: 43
is_service_related_disabled_veteran_owned_business 0 1.00 0.00 FAL: 43
is_sheltered_workshop 0 1.00 0.00 FAL: 43
is_small_agricultural_cooperative 0 1.00 0.00 FAL: 43
is_small_business 0 1.00 0.00 FAL: 43
is_sole_propreitorship 0 1.00 0.00 FAL: 43
is_state_controlled_institution_of_higher_learning 0 1.00 0.00 FAL: 43
is_state_government 0 1.00 0.00 FAL: 43
is_subchapter_s_corporation 0 1.00 0.00 FAL: 43
is_sub_continent_asian_american_owned_business 0 1.00 0.00 FAL: 43
is_township_local_government 0 1.00 0.00 FAL: 43
is_transaction_closed 1 0.98 0.02 FAL: 41, TRU: 1
is_transit_authority 0 1.00 0.00 FAL: 43
is_tribal_college 0 1.00 0.00 FAL: 43
is_tribal_government 0 1.00 0.00 FAL: 43
is_tribally_owned_firm 0 1.00 0.00 FAL: 43
is_us_government_entity 0 1.00 0.00 FAL: 43
is_very_small_business 0 1.00 0.00 FAL: 43
is_veteran_owned 0 1.00 0.00 FAL: 43
is_veterinary_college 0 1.00 0.00 FAL: 43
is_veterinary_hospital 0 1.00 0.00 FAL: 43
is_women_owned 0 1.00 0.00 FAL: 43
is_women_owned_small_business 0 1.00 0.00 FAL: 43
has_contract_parent 0 1.00 0.00 FAL: 43
is_idv 13 0.70 0.17 FAL: 25, TRU: 5
has_duns_parent 0 1.00 1.00 TRU: 43
is_missing_duns 0 1.00 0.00 FAL: 43
has_multiple_vendors 0 1.00 0.00 FAL: 43
has_multiple_parent_vendors 0 1.00 0.00 FAL: 43
is_possible_sbirsttr 0 1.00 0.12 FAL: 38, TRU: 5
is_filtered_sbir 0 1.00 0.30 FAL: 30, TRU: 13
has_labor_standards_clause 41 0.05 0.00 FAL: 2
is_fbo_sourced 36 0.16 0.43 FAL: 4, TRU: 3
is_sea_transported 36 0.16 0.00 FAL: 7
has_federal_account 30 0.30 0.62 TRU: 8, FAL: 5

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id_duns 0 1.00 81312039.93 3434071.08 80788349.00 80788349.0 80788349.00 80788349 103307059 ▇▁▁▁▁
id_duns_parent 0 1.00 81312039.93 3434071.08 80788349.00 80788349.0 80788349.00 80788349 103307059 ▇▁▁▁▁
id_duns_analysis 0 1.00 81312039.93 3434071.08 80788349.00 80788349.0 80788349.00 80788349 103307059 ▇▁▁▁▁
amount_base_all_option 0 1.00 13046444.27 40327934.45 0.00 0.0 50000.00 5000000 249550000 ▇▁▁▁▁
amount_base_all_option_total 0 1.00 25940392.83 48049514.72 16328.95 379631.3 12000000.00 18000000 249550000 ▇▁▁▁▁
amount_base_and_exercised_option 3 0.93 2960427.59 7031409.47 0.00 0.0 50000.00 3228750 35834763 ▇▁▁▁▁
amount_base_and_exercised_option_total 3 0.93 7083672.29 8375473.40 16328.95 261262.7 3797499.50 12900000 35834763 ▇▃▁▁▁
amount_fee_paid_for_service 13 0.70 0.00 0.00 0.00 0.0 0.00 0 0 ▁▁▇▁▁
amount_obligation 0 1.00 2456793.13 7187599.92 -11800000.00 0.0 29000.00 1875000 35834763 ▁▇▁▁▁
amount_obligation_total 0 1.00 5731904.48 7597275.82 0.00 180631.3 749999.95 11957500 35834763 ▇▅▁▁▁
amount_estimated_total 40 0.07 83350000.00 143933639.22 0.00 250000.0 500000.00 125025000 249550000 ▇▁▁▁▃
amount_individual_order_limit 40 0.07 83350000.00 143933639.22 0.00 250000.0 500000.00 125025000 249550000 ▇▁▁▁▃
amount_non_governmental_dollars 33 0.23 0.00 0.00 0.00 0.0 0.00 0 0 ▁▁▇▁▁
id_naics 10 0.77 542581.79 8090.56 511210.00 541715.0 541715.00 541715 561621 ▁▁▁▇▁
count_actions 13 0.70 1.00 0.00 1.00 1.0 1.00 1 1 ▁▁▇▁▁
count_offers_received_idv 38 0.12 1.00 0.00 1.00 1.0 1.00 1 1 ▁▁▇▁▁
count_offers_received 15 0.65 49.25 152.61 1.00 1.0 1.00 2 600 ▇▁▁▁▁
number_transaction 13 0.70 0.00 0.00 0.00 0.0 0.00 0 0 ▁▁▇▁▁
pct_price_difference_price_evaluation 26 0.40 0.00 0.00 0.00 0.0 0.00 0 0 ▁▁▇▁▁
year_budget 0 1.00 2019.42 1.26 2012.00 2019.0 2020.00 2020 2020 ▁▁▁▁▇
code_department_award 0 1.00 95.12 6.96 70.00 97.0 97.00 97 97 ▁▁▁▁▇
code_department_funding 4 0.91 94.92 7.29 70.00 97.0 97.00 97 97 ▁▁▁▁▇
id_cgac_funding 4 0.91 61.08 23.32 17.00 57.0 57.00 70 97 ▂▁▇▁▃
id_cgac_award 0 1.00 56.05 23.33 17.00 57.0 57.00 57 97 ▂▁▇▁▂
id_cgac_agency_idv 38 0.12 86.20 14.79 70.00 70.0 97.00 97 97 ▅▁▁▁▇
count_vendors_listed 0 1.00 1.00 0.00 1.00 1.0 1.00 1 1 ▁▁▇▁▁
count_vendors_parent_listed 0 1.00 1.00 0.00 1.00 1.0 1.00 1 1 ▁▁▇▁▁
amount_obligation_allocated 30 0.30 476737.82 1576611.13 0.00 0.0 16328.95 50000 5720000 ▇▁▁▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
datetime_contract_modified 0 1.00 2012-07-27 15:20:59 2020-09-15 23:02:53 2020-01-31 16:59:11 42
datetime_transaction_approved 0 1.00 2012-07-27 15:20:59 2020-09-02 12:25:10 2019-12-18 12:08:07 43
datetime_transaction_created 0 1.00 2012-07-27 15:20:45 2020-08-06 14:16:05 2019-12-03 12:27:59 43
datetime_transaction_last_modified 0 1.00 2012-07-27 15:20:59 2020-09-15 23:02:53 2020-01-31 16:59:11 42
datetime_transaction_closed 42 0.02 2020-09-15 23:02:53 2020-09-15 23:02:53 2020-09-15 23:02:53 1

Lets remove excess features.

tbl_anduril <-
  tbl_anduril %>% preprocess(removeConstants = T) %>% as_tibble()

[2020-11-05 16:16:56 preprocess] Removing constant features… [2020-11-05 16:16:56 preprocess] Done

Lets make sure we have the Anduril we are looking for.

tbl_count <- 
  tbl_anduril %>%
  count(name_vendor, id_duns, name = "count_actions")
gt(tbl_count)
name_vendor id_duns count_actions
ANDURIL ENGINEERING LLC 103307059 1
ANDURIL INDUSTRIES INC 80788349 42

Looks like there is another vendor that isn’t who we are interested in. Lets exclude them.

tbl_anduril <-
  tbl_anduril %>%
  filter(id_duns != 103307059)

Anduril Awards by Agency and Year

gt_dept <- 
  tbl_anduril %>%
  count(name_agency_cgac_award,
        year_budget,
        wt = amount_obligation,
        name = "amount") %>%
  arrange(desc(amount)) %>% 
  munge_data() %>% 
  gt()
gt_dept
name_agency_cgac_award year_budget amount
DEPARTMENT OF HOMELAND SECURITY 2020 $60,731,513
DEPARTMENT OF THE AIR FORCE 2020 $25,715,000
DEPARTMENT OF DEFENSE 2020 $6,900,000
DEPARTMENT OF THE NAVY 2019 $5,981,263
DEPARTMENT OF DEFENSE 2019 $5,200,000
DEPARTMENT OF THE AIR FORCE 2019 $1,098,000
DEPARTMENT OF THE NAVY 2020 $0

Anduril Award Description Analysis

One of the biggest benefits of fpds_atom is that it gives access to procurement text descriptions. Lets take a look at them for Anduril and see what 2 sets of words show up together and visualize them in a network graph.

stop_words <- get_stopwords(source = "smart")

tidy_ngram <-
  tbl_anduril %>%
  select(id_contract_analysis,
         description_obligation,
         amount_obligation) %>%
  unnest_tokens(bigram,
                description_obligation,
                token = "ngrams",
                n = 2)

Next we can filter out stop words and build our bigram count table.

bigram_counts <-
  tidy_ngram %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stop_words$word,
         !word2 %in% stop_words$word) %>%
  count(word1, word2, sort = TRUE)
gg_bigraph <-
  bigram_counts %>%
  filter(!is.na(word1)) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "nicely") +
  geom_edge_link(
    aes(edge_alpha = n),
    show.legend = FALSE,
    arrow = arrow(length = unit(1.5, 'mm')),
    start_cap = circle(3, 'mm'),
    end_cap = circle(3, 'mm')
  ) +
  geom_node_text(aes(label = name)) +
  theme_graph() +
  labs(title = "Anduril FPDS Description Bigram")
gg_bigraph

Anduril Trigram Award Totals

Next lets take a look at which 3 words show up together and how many contracts this and obligations they correspond to.

First we build the ngram table

tbl_ngrams <-
  tbl_anduril %>%
  select(id_contract_analysis,
         description_obligation,
         amount_obligation) %>%
  unnest_tokens(ngram, description_obligation, token = "ngrams", n = 3)

Next we build the summary

 tbl_ngrams <- 
  tbl_ngrams %>%
  group_by(ngram) %>%
  summarise(
    amount = sum(amount_obligation),
    count_contracts = n_distinct(id_contract_analysis)
  ) %>%
  arrange(desc(amount)) %>% 
  munge_data()

Lets look at the top 15

tbl_ngrams %>% 
  slice(1:15) %>% 
  gt()
ngram amount count_contracts
AUTONOMOUS SURVEILLANCE TOWERS $60,731,513 3
SURVEILLANCE TOWERS DELIVERY $60,731,513 2
TOWERS DELIVERY ORDER $60,731,513 2
III AUTONOMOUS SURVEILLANCE $35,834,763 1
PHASE III AUTONOMOUS $35,834,763 1
SBIR PHASE III $35,834,763 1
USBP SBIR PHASE $35,834,763 1
BATTLE MANAGEMENT SYSTEM $28,715,000 3
SBIR AUTONOMOUS SURVEILLANCE $24,896,750 2
MANAGEMENT SYSTEM SENSING $24,800,000 1
SYSTEM SENSING NETWORK $24,800,000 1
ADVANCED BATTLE MANAGEMENT $16,915,000 3
FUND INCREASE STRATFI $12,800,000 1
INCREASE STRATFI ADD $12,800,000 1
NETWORK STRATEGIC FUND $12,800,000 1

Part IV: Analyzing Phase III SBIR/STTRs

This next section shows you how to take advantage of a special fpds_csv wrapper that acquires SBIR/STTR procurement.

Acquire All Phase III SBIR Procurements

df_all <-
  fpds_research_csv(research_codes = c("SR3", "ST3"),
                    snake_names = T)

For the sake of time lets use a cached version of the data I acquired yesterday.

df_all <- read_rda("data/phase_iii.rda")

Lets look at the data

glimpse(df_all)

Rows: 58,839 Columns: 51 $ code_research “SR3”, “SR3”, “SR3”, “SR3”, “SR3”, “S… $ id_contract ”MSFC0199208DNAS839394“,”MSFC0199305… $ date_obligation 1992-07-29, 1993-05-24, 1996-08-22, … $ id_contract_analysis “MSFC0199208DNAS839394”, “MSFC0199305… $ type_procurement ”AWARD“,”AWARD“,”AWARD“,”AWARD“,”… $ id_contract_idv NA, NA, NA, NA, NA, NA, NA, “N0017498… $ code_modification ”0“,”0“,”0“,”1“,”0“,”1“,”1“,”0… $ number_transaction 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… $ type_award “DCA DEFINITIVE CONTRACT”, “DCA DEFIN… $ amount_obligation 1281447, 558195, 598535, 0, 1320000, … $ id_agency_award ”8000“,”8000“,”8000“,”8000“,”8000… $ name_agency_award “NATIONAL AERONAUTICS AND SPACE ADMIN… $ name_office_award ”MARSHALL SPACE FLIGHT CENTER“,”MARS… $ type_product_or_service “S”, “S”, “S”, “S”, “S”, “S”, “S”, “S… $ code_product_service ”AR91“,”C211“,”AR22“,”AR22“,”M182… $ id_naics NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ name_naics NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ id_duns 54910716, 153869896, 621970383, 62197… $ name_vendor “APPLIED RESEARCH INC”, “MAYFLOWER CO… $ city_vendor ”HUNTSVILLE“,”BURLINGTON“,”GREENVIL… $ code_state_vendor “AL”, “MA”, “IN”, “IN”, “AL”, “AL”, “… $ zipcode_vendor ”35806“,”01803“,”47124“,”47124“,”… $ id_duns_parent 54910716, 153869896, 621970383, 62197… $ name_vendor_parent “APPLIED RESEARCH INC”, “MAYFLOWER CO… $ url_csv https://www.fpds.gov/ezsearch/fpdspo… $ year_fiscal_obligation 1992, 1993, 1996, 1996, 1997, 1997, 1… $ has_duns_parent TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T… $ is_missing_duns FALSE, FALSE, FALSE, FALSE, FALSE, FA… $ id_department_award “8000”, “8000”, “8000”, “8000”, “8000… $ name_department_award ”NATIONAL AERONAUTICS AND SPACE ADMIN… $ code_department_award 80, 80, 80, 80, 80, 80, 80, 97, 97, 8… $ type_contract_id_analysis “CONTRACT”, “CONTRACT”, “CONTRACT”, “… $ id_cgac_award 80, 80, 80, 80, 80, 80, 80, 17, 17, 8… $ name_agency_cgac_award ”NATIONAL AERONAUTICS AND SPACE ADMIN… $ slug_cgac_award “080”, “080”, “080”, “080”, “080”, “0… $ type_psc ”SERVICE“,”SERVICE“,”SERVICE“,”SER… $ name_product_service “R&D- SPACE: OTHER”, “ARCHITECT AND E… $ id_solicitation_group ”A“,”C“,”A“,”A“,”M“,”M“,”C“,”A… $ name_solicitation_group “RESEARCH AND DEVELOPMENT”, “ARCHITEC… $ has_parent FALSE, FALSE, FALSE, FALSE, FALSE, FA… $ is_idv FALSE, FALSE, FALSE, FALSE, FALSE, FA… $ code_additional_reporting NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ description_additonal_reporting NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ date_solicitation NA, NA, NA, NA, NA, NA, NA, NA, NA, … $ year_budget 1992, 1993, 1996, 1997, 1997, 1998, 1… $ names_vendor_listed ”APPLIED RESEARCH INC“,”MAYFLOWER CO… $ count_vendors_listed 1, 3, 2, 2, 4, 4, 3, 2, 2, 2, 1, 1, 2… $ has_multiple_vendors FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, … $ names_vendor_parent_listed “APPLIED RESEARCH INC”, "MAYFLOWER CO… $ count_vendors_parent_listed 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1… $ has_multiple_parent_vendors FALSE, FALSE, FALSE, FALSE, FALSE, FA…

skim(df_all)
Data summary
Name df_all
Number of rows 58839
Number of columns 51
_______________________
Column type frequency:
character 32
Date 2
logical 6
numeric 11
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
code_research 0 1.00 3 3 0 2 0
id_contract 0 1.00 5 22 0 15173 0
id_contract_analysis 0 1.00 8 22 0 15169 0
type_procurement 0 1.00 5 5 0 1 0
id_contract_idv 39193 0.33 8 20 0 1070 0
code_modification 0 1.00 1 7 0 466 0
type_award 0 1.00 17 35 0 9 0
id_agency_award 0 1.00 4 4 0 82 0
name_agency_award 2423 0.96 10 59 0 74 0
name_office_award 0 1.00 3 63 0 820 0
type_product_or_service 0 1.00 1 1 0 2 0
code_product_service 8 1.00 4 4 0 1119 0
name_naics 1197 0.98 7 118 0 684 0
name_vendor 0 1.00 3 77 0 3202 0
city_vendor 0 1.00 3 23 0 1157 0
code_state_vendor 489 0.99 2 20 0 81 0
zipcode_vendor 372 0.99 2 5 0 1806 0
name_vendor_parent 138 1.00 4 71 0 2651 0
url_csv 0 1.00 153 192 0 23 0
id_department_award 14 1.00 4 4 0 29 0
name_department_award 0 1.00 11 45 0 29 0
type_contract_id_analysis 0 1.00 3 8 0 2 0
name_agency_cgac_award 5 1.00 19 45 0 29 0
slug_cgac_award 5 1.00 3 3 0 29 0
type_psc 2575 0.96 7 7 0 2 0
name_product_service 2575 0.96 6 100 0 886 0
id_solicitation_group 8 1.00 1 2 0 102 0
name_solicitation_group 39 1.00 6 75 0 101 0
code_additional_reporting 53885 0.08 1 4 0 4 0
description_additonal_reporting 53885 0.08 17 82 0 4 0
names_vendor_listed 0 1.00 4 239 0 2749 0
names_vendor_parent_listed 138 1.00 4 215 0 2623 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date_obligation 0 1.00 1992-07-29 2020-11-03 2012-08-24 5573
date_solicitation 56254 0.04 1989-05-28 2020-09-21 2019-02-19 495

Variable type: logical

skim_variable n_missing complete_rate mean count
has_duns_parent 0 1 1.00 TRU: 58839
is_missing_duns 0 1 0.00 FAL: 58839
has_parent 0 1 0.12 FAL: 51910, TRU: 6929
is_idv 0 1 0.33 FAL: 39193, TRU: 19646
has_multiple_vendors 0 1 0.65 TRU: 38346, FAL: 20493
has_multiple_parent_vendors 138 1 0.05 FAL: 55888, TRU: 2813

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
number_transaction 0 1.00 0.41 4.22 0 0 0 0 786 ▇▁▁▁▁
amount_obligation 0 1.00 331866.93 5522628.32 -663800111 0 9480 153165 669175315 ▁▁▇▁▁
id_naics 842 0.99 473493.90 117738.57 111110 339112 541330 541712 928120 ▁▂▇▁▁
id_duns 0 1.00 263947645.59 290446680.28 1030667 123456787 123456787 179041470 969907914 ▇▁▁▁▂
id_duns_parent 0 1.00 247246279.32 281229196.05 1024314 123456787 123456787 175966675 985015354 ▇▁▁▁▁
year_fiscal_obligation 0 1.00 2012.74 3.87 1992 2011 2012 2015 2020 ▁▁▁▇▃
code_department_award 14 1.00 92.62 14.73 11 97 97 97 97 ▁▁▁▁▇
id_cgac_award 0 1.00 32.93 23.77 11 19 21 47 97 ▇▁▂▁▁
year_budget 0 1.00 2012.94 3.87 1992 2011 2012 2015 2021 ▁▁▁▇▃
count_vendors_listed 0 1.00 2.22 1.04 1 1 2 3 7 ▇▇▁▁▁
count_vendors_parent_listed 138 1.00 1.09 0.57 1 1 1 1 9 ▇▁▁▁▁

Phase III Cumulative Spend

First thing to look at is the cumulative spend.

tbl_cumulative_sbir <-
  df_all %>%
  count(date_obligation, wt = amount_obligation, name = "amount") %>%
  mutate(amount_cumulative = cumsum(amount))

Next lets visualize it in an area chart.

gg_cumulative_spend <-
  tbl_cumulative_sbir %>%
  ggplot(aes(x = date_obligation, y = amount_cumulative)) +
  theme_minimal() +
  geom_line(color = "#00B0F0", size = .5) +
  geom_area(fill = "#00B0F0",
            alpha = 0.25,
            color = NA) +
  theme(
    panel.background = element_rect(fill = "#fffff8", color = NA),
    plot.background = element_rect(fill = "#fffff8", color = NA)
  ) +
  labs(
    x = NULL,
    y = "$ Obligated",
    title = "Cumulative Phase III STTR/SBIR Obligations",
    subtitle = "",
    caption = ""
  ) +
  scale_x_date(expand = c(0, 0),
               breaks = scales::pretty_breaks(n = 10)) +
  hrbrthemes::theme_ipsum_rc(
    grid = "XY",
    plot_title_size = 10,
    subtitle_size = 8.5,
    caption_size = 8.5,
    axis_text_size = 10,
    axis_title_size = 10,
    strip_text_size = 10
  ) +
  geom_smooth(
    colour = "#000000",
    method = 'loess',
    span = .3,
    size = .5,
    alpha = 0.45
  ) +
  geom_smooth(
    colour = "red",
    method = 'lm',
    size = .5,
    alpha = 0.45
  ) +
  scale_y_continuous(labels = scales::dollar,
                     breaks = scales::pretty_breaks(n = 10))
gg_cumulative_spend

R is a functional programming language. Lets take advantage of this by turning this area visualization code into a function that we will use later for something very cool.

plot_gg_cumulative_area <-
  function(data,
           plot_title = "",
           use_lm = T,
           use_loess = T)  {
    gg <- data %>%
      ggplot(aes(x = date_obligation, y = amount_cumulative)) +
      theme_minimal() +
      geom_line(color = "#00B0F0", size = .5) +
      geom_area(fill = "#00B0F0",
                alpha = 0.25,
                color = NA) +
      theme(
        panel.background = element_rect(fill = "#fffff8", color = NA),
        plot.background = element_rect(fill = "#fffff8", color = NA)
      ) +
      labs(
        x = NULL,
        y = "$ Obligated",
        title = plot_title,
        subtitle = "",
        caption = ""
      ) +
      scale_x_date(expand = c(0, 0),
                   breaks = scales::pretty_breaks(n = 10)) +
      hrbrthemes::theme_ipsum_rc(
        grid = "XY",
        plot_title_size = 7,
        subtitle_size = 6.5,
        caption_size = 6.5,
        axis_text_size = 7,
        axis_title_size = 7,
        strip_text_size = 7
      ) +
      scale_y_continuous(labels = scales::dollar,
                         breaks = scales::pretty_breaks(n = 10))
    
    if (use_loess) {
      gg <- gg  +
        geom_smooth(
          colour = "#000000",
          method = 'loess',
          span = .3,
          size = .5,
          alpha = 0.45
        )
    }
    
    if (use_lm) {
      gg <- gg +
        geom_smooth(
          colour = "red",
          method = 'lm',
          size = .5,
          alpha = 0.45
        )
    }
    
    gg
    
  }

Lets make sure the function worked

plot_gg_cumulative_area(
  data = tbl_cumulative_sbir,
  plot_title = "Does this work?",
  use_lm = T,
  use_loess = T
)

It worked.

Phase III Annual Spend by Type

Now lets look at the spend over time by year by SBIR/STTR type.

tbl_year <- df_all %>%
  group_by(code_research, year_fiscal_obligation) %>%
  summarise(amount = sum(amount_obligation),
            .groups = "drop")
tbl_year %>%
  munge_data() %>%
  arrange(year_fiscal_obligation) %>%
  gt()
code_research year_fiscal_obligation amount
SR3 1992 $1,281,447
SR3 1993 $558,195
SR3 1996 $598,535
SR3 1997 $53,408,396
SR3 1998 $38,843,446
SR3 1999 $54,803,009
SR3 2000 $85,876,330
ST3 2000 $40,000
SR3 2001 $167,675,515
ST3 2001 $11,798,586
SR3 2002 $304,914,144
ST3 2002 $9,388,649
SR3 2003 $351,887,605
ST3 2003 $10,736,465
SR3 2004 $431,014,329
ST3 2004 $18,307,634
SR3 2005 $407,159,674
ST3 2005 $7,580,577
SR3 2006 $418,706,806
ST3 2006 $6,755,257
SR3 2007 $489,983,812
ST3 2007 $40,517,187
SR3 2008 $534,127,115
ST3 2008 $26,067,470
SR3 2009 $575,769,185
ST3 2009 $31,857,902
SR3 2010 $765,843,775
ST3 2010 $101,683,227
SR3 2011 $1,863,224,728
ST3 2011 $78,128,988
SR3 2012 $1,828,034,443
ST3 2012 $115,500,189
SR3 2013 $1,247,732,290
ST3 2013 $86,455,753
SR3 2014 $1,007,429,995
ST3 2014 $80,262,242
SR3 2015 $900,644,712
ST3 2015 $42,969,009
SR3 2016 $1,104,644,307
ST3 2016 $48,888,230
SR3 2017 $1,279,910,551
ST3 2017 $54,705,216
SR3 2018 $1,426,320,340
ST3 2018 $43,866,462
SR3 2019 $1,670,207,143
ST3 2019 $145,051,727
SR3 2020 $1,423,578,905
ST3 2020 $131,978,671

Now we can visualize it.

hc_bar_year <- hchart(tbl_year,
                      "column",
                      hcaes(x = year_fiscal_obligation,
                            y = amount,
                            group = code_research)) %>%
  hc_add_theme(hc_theme_elementary())  %>%
  hc_title(text = "SBIR/STTR Phase III Awards by Year") %>% 
  hc_plotOptions(series = list(stacking = "normal"))
hc_bar_year

Phase III Department Spend Stream Graph

Next lets take a look at department spend over time.

Again we are going to want to lump a bunch of the low frequency agencies together.

We also will limit this analysis for SBIR/STTRs since 2000

tbl_by_dept <-
  df_all %>%
  count(year_budget,
        name_department_award,
        wt = amount_obligation,
        name = "amount") %>%
  filter(amount > 0, year_budget >= 2000) %>%
  mutate(
    department_group = fct_lump(
      name_department_award,
      n = 5,
      w = amount,
      other_level = "ALL OTHER DEPARTMENTS"
    )
  ) %>%
  count(year_budget, department_group, wt = amount, name = "amount") %>%
  mutate(department_group = fct_reorder(department_group, -amount))

Now we can build an interactive streamgraph with this information.

hc_dept_stream_graph <-
  tbl_by_dept %>%
  hchart("streamgraph",
         hcaes(year_budget, amount, group = department_group)) %>%
  hc_yAxis(visible = T,
           startOnTick = FALSE,
           endOnTick = FALSE) %>%
  hc_title(text = "SBIR Phase III by Department")
hc_dept_stream_graph

Exploring SBIR/STTR Phase III Award by Agency

Lets explore this data by agency.

First lets see the raw breakdown by agency.

tbl_agency_amt <-
  df_all %>%
  filter(!is.na(name_agency_cgac_award)) %>%
  count(name_agency_cgac_award,
        wt = amount_obligation,
        name = "amount",
        sort = T)
tbl_agency_amt %>%
  munge_data() %>%
  gt()
name_agency_cgac_award amount
DEPARTMENT OF THE NAVY $9,069,719,016
DEPARTMENT OF THE ARMY $4,462,824,702
DEPARTMENT OF THE AIR FORCE $3,542,156,690
DEPARTMENT OF DEFENSE $772,759,745
NATIONAL AERONAUTICS AND SPACE ADMINISTRATION $611,909,854
GENERAL SERVICES ADMINISTRATION $577,131,261
DEPARTMENT OF HOMELAND SECURITY $170,250,961
AGENCY FOR INTERNATIONAL DEVELOPMENT $108,824,103
DEPARTMENT OF THE INTERIOR $54,903,798
DEPARTMENT OF VETERANS AFFAIRS $33,487,142
DEPARTMENT OF THE TREASURY $20,680,291
DEPARTMENT OF HEALTH AND HUMAN SERVICES $20,429,781
DEPARTMENT OF COMMERCE $20,373,748
DEPARTMENT OF ENERGY $19,193,713
DEPARTMENT OF AGRICULTURE $12,107,410
DEPARTMENT OF TRANSPORTATION $12,093,171
DEPARTMENT OF STATE $11,949,069
DEPARTMENT OF JUSTICE $5,433,162
NATIONAL SCIENCE FOUNDATION $250,000
DEPARTMENT OF LABOR $92,101
SECURITIES AND EXCHANGE COMMISSION $26,412
EQUAL EMPLOYMENT OPPORTUNITY COMMISSION $26,307
SMITHSONIAN INSTITUTION $18,744
DEPARTMENT OF HOUSING AND URBAN DEVELOPMENT $15,005
EXECUTIVE OFFICE OF THE PRESIDENT $12,398
SELECTIVE SERVICE SYSTEM $10,617
SOCIAL SECURITY ADMINISTRATION $10,351
ENVIRONMENTAL PROTECTION AGENCY $7,047
DEPARTMENT OF EDUCATION $4,598

Next lets lump together low frequency agencies and visualize this data.

gg_agency <-
  tbl_agency_amt %>%
  mutate(
    agency_group = fct_lump(
      name_agency_cgac_award,
      n = 15,
      w = amount,
      other_level = "ALL OTHER AGENCIES"
    )
  ) %>%
  count(agency_group,
        wt = amount,
        sort = T,
        name = "amount") %>%
  mutate(agency_group = fct_reorder(agency_group, amount)) %>%
  ggplot(aes(x = agency_group, y = amount)) +
  geom_bar(
    stat = "identity",
    fill = "#f68060",
    alpha = .6,
    width = .4
  ) +
  coord_flip() +
  xlab("") +
  scale_y_continuous(labels = scales::dollar, n.breaks = 5) +
  geom_text(
    aes(label = round(amount / 1000000, digits = 2)),
    position = position_dodge(width = 0.9),
    vjust = -0.25,
    check_overlap = T
  ) +
  ggtitle("Top SBIR/STTR Phase III Awarding Agencies") +
  labs(subtitle = "By Lumped Government-wide Accounting Agency [CGAC] - Top 15",
       x = "") +
  theme_ipsum()
gg_agency

Lets make this look better

gg_agency <-
  gg_agency +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "none",
    text = element_text(size = 8,  family = "serif")
  ) +
  scale_y_sqrt(labels = scales::dollar, n.breaks = 4)
gg_agency

Explore Every SBIR/STTR Awarding Office

Lets take a look at all the awarding offices of SBIR Phase III and put it into an interactive table.

tbl_office <-
  df_all %>%
  filter(!is.na(name_office_award)) %>%
  group_by(name_department_award,
           name_agency_cgac_award,
           name_office_award) %>%
  summarise(
    date_first = min(date_obligation, na.rm = T),
    date_recent = max(date_obligation, na.rm = T),
    distinct_vendors = n_distinct(id_duns, na.rm = T),
    amount = sum(amount_obligation, na.rm = T),
    amount_mean = mean(amount_obligation),
    actions = n(),
    contracts = n_distinct(id_contract_analysis, na.rm = T),
    .groups = "drop"
  ) %>%
  filter(amount > 0) %>%
  arrange(desc(amount))
table_office <- tbl_office %>%
  reactable(
    filterable = F,
    resizable = T,
    searchable = T,
    showPageSizeOptions = T,
    pageSizeOptions = c(5, 10, 20),
    sortable = T,
    compact = T,
    columns = list(
      date_first = colDef(name = "First Award"),
      date_recent = colDef(name = "Recent Award"),
      name_department_award = colDef(
        name = "Department",
        sortable = T,
        filterable = T
      ),
      name_agency_cgac_award = colDef(
        name = "Agency",
        sortable = T,
        filterable = T
      ),
      name_office_award = colDef(
        name = "Office",
        sortable = T,
        filterable = T
      ),
      distinct_vendors = colDef(name = "Unique Vendors"),
      amount = colDef(
        name = "$ Total",
        format = colFormat(
          prefix = "$",
          separators = TRUE,
          digits = 0
        )
      ),
      amount_mean = colDef(
        name = "$ Mean",
        format = colFormat(
          prefix = "$",
          separators = TRUE,
          digits = 0
        )
      ),
      contracts = colDef(name = "Unique Contracts"),
      actions = colDef(name = "Contract Actions")
    )
  )
table_office

First Time Phase III Vendor Analysis.

First thing is to build a table that shows us first time vendors. We have to exclude 1 special DUNS number, inquisitive minds should feel free to explore it on their own time.

tbl_first <-
  df_all %>%
  filter(id_duns != 123456787) %>%
  group_by(id_duns) %>%
  filter(date_obligation == min(date_obligation)) %>%
  filter(amount_obligation > 0) %>%
  slice(1) %>%
  ungroup() %>%
  select(
    id_duns,
    year_budget_first = year_budget,
    date_first_phase_3 = date_obligation,
    id_contract_analysis_first = id_contract_analysis,
    department_first = name_department_award,
    agency_first = name_agency_cgac_award,
    office_first = name_office_award
  ) %>%
  arrange(desc(year_budget_first))

This gives us information on 2,450 Phase III awardees.

Now lets look at the raw counts over time between fisca year 1998 and 2020.

gg_first_time <- 
  tbl_first %>%
  count(year_budget_first) %>%
  filter(year_budget_first >= 1998, year_budget_first <= 2020) %>%
  ggplot(aes(year_budget_first, n)) +
  geom_line() +
  theme_ipsum() +
  labs(title = "New Phase III SBIR/STTR Recipients",
       x = "Budget Year",
       y = "Count")
gg_first_time

Agency Attribution of New Phase III Companies

Next lets take it a step further and take a look at an attribution by year of which agencies were responsible for the new vendors.

colors <-
  c("#00008BFF",
    "#1EFAA0FF",
    "#00FA00FF",
    "#ADFF2FFF",
    "#FA7D00FF",
    "#78005AFF")

hc_attribution <- 
  tbl_first %>%
  count(year_budget_first, agency_first) %>%
  filter(year_budget_first >= 1998, year_budget_first <= 2020) %>%
  mutate(agency_lumped = fct_lump(agency_first, n = 5, other_level = "ALL OTHER AGENCIES")) %>%
  count(year_budget_first, agency_lumped, wt = n) %>%
  mutate(agency_lumped = fct_reorder(agency_lumped, n)) %>%
  filter(!is.na(agency_lumped)) %>%
  hchart(
    "column",
    hcaes(year_budget_first, n, group = agency_lumped),
    stacking = "percent",
    borderWidth = 0,
    groupPadding = 0,
    pointPadding  = 0,
  ) %>%
  hc_colors(colors) %>%
  hc_add_theme(hc_theme_538()) %>%
  hc_xAxis(gridLineWidth = 0,
           title = list(text = NULL)) %>%
  hc_tooltip(
    table = TRUE,
    outside = TRUE,
    shared = TRUE,
    useHTML = TRUE,
    headerFormat = "<small>{point.key}</small><table>",
    pointFormat = str_c(
      "<tr><td style=\"color: {series.color}\">{series.name}: </td>",
      "<td style=\"text-align: right\"><b>{point.y:0.f}</b></td>"
    ),
    footerFormat = "<tr><td><b>Total</b>: </td><td style=\"text-align: right\"><b>{point.total:0.f}</b></td></tr></table>",
    style = list(fontSize = "0.7em")
  ) %>%
  hc_legend(
    verticalAlign = "top",
    align = "left",
    itemStyle =  list(fontWeight = 500)
  ) %>%
  hc_title(text = "Share of New Phase III Awardees by Agency Group") %>%
  hc_plotOptions(series = list(
    marker = list(
      radius = 0,
      enabled = FALSE,
      symbol = "circle"
    ),
    states = list(hover = list(halo = list(size = 0)))
  )) %>%
  hc_yAxis(title = list(text = "Share of New Vendors"),
           type = "percent")
hc_attribution

Deep Dive: Phase III Awardee Exploration Trelliscope

Now lets dig into the Phase III awardees and build an interactive trelliscope to explore the data!

First step is to build a vendor summary data frame

tbl_vendors <-
  df_all %>%
  filter(!is.na(id_duns)) %>%
  group_by(id_duns) %>%
  summarise(
    vendor = name_vendor[which.max(amount_obligation)],
    date_first = min(date_obligation, na.rm = T),
    date_recent = max(date_obligation, na.rm = T),
    count_types = n_distinct(code_research, na.rm = T),
    count_agencies = n_distinct(id_cgac_award, na.rm = T),
    count_departments = n_distinct(id_department_award),
    count_offices = n_distinct(name_office_award, na.rm = T),
    amount = sum(amount_obligation, na.rm = T),
    amount_mean = mean(amount_obligation),
    actions = n(),
    contracts = n_distinct(id_contract_analysis, na.rm = T),
    .groups = "drop"
  ) %>%
  filter(amount > 0) %>%
  arrange(desc(amount)) %>%
  mutate(
    year_recent = year(date_recent),
    year_first = year(date_first),
    .before = "date_first"
  )

Here are the top 10

tbl_vendors %>%
  slice(1:10) %>%
  munge_data() %>%
  gt()
id_duns vendor year_recent year_first date_first date_recent count_types count_agencies count_departments count_offices amount amount_mean actions contracts
123456787 MISCELLANEOUS FOREIGN CONTRACTORS 2020 2006 2006-08-04 2020-09-25 2 5 5 88 $2,323,610,293 $104,502 22235 8491
945837219 PROGENY SYSTEMS CORPORATION 2020 2000 2000-03-10 2020-07-30 2 4 2 22 $1,055,297,830 $969,052 1089 90
95275343 NAVMAR APPLIED SCIENCES CORPORATION 2020 1999 1999-05-10 2020-07-30 2 2 1 3 $931,016,698 $1,313,141 709 26
122809887 ALLIANT TECHSYSTEMS OPERATIONS LLC 2020 2007 2007-01-04 2020-07-22 2 2 1 4 $690,088,178 $5,949,036 116 8
153927827 FRONTIER TECHNOLOGY INC. 2020 2009 2009-09-22 2020-10-19 2 5 2 16 $459,077,093 $684,169 671 61
53885604 TECHNOLOGY SERVICE CORPORATION 2020 2003 2003-05-27 2020-07-13 1 4 1 9 $440,110,501 $2,404,975 183 14
153245857 DIGITAL SYSTEM RESOURCES, INC 2016 1998 1998-08-27 2016-06-22 1 2 1 11 $318,833,971 $1,423,366 224 14
151305088 GATR TECHNOLOGIES, INC. 2020 2008 2008-08-22 2020-08-03 1 3 1 7 $314,651,825 $568,991 553 27
140785929 3 PHOENIX, INC. 2019 2006 2006-07-10 2019-12-13 2 2 1 4 $308,735,010 $1,147,714 269 12
803423235 CHESAPEAKE SCIENCES CORPORATION 2020 2000 2000-06-02 2020-07-20 2 2 1 9 $307,756,522 $506,178 608 30

Ahh there is that interesting DUNS number again. Lets get rid of it.

tbl_vendors <-
  tbl_vendors %>%
  filter(id_duns != 123456787)

Lets explore the data interactively [this will not work in the rendered markdown]

esquisse::esquisser(tbl_vendors)

Lets filter out some of the the small awardees by ensuring the companies have at-least 2 contracts and over $500,000 in awards

tbl_vendors <-
  tbl_vendors %>%
  filter(contracts >= 2, amount >= 500000)

Lets try to figure out how many clusters there are.

mplot3.xy(
  log(tbl_vendors$actions),
  log(tbl_vendors$amount / 1000000),
  cluster = "PAM",
  cluster.params = list(k = 10),
  main = "Phase III $ by Actions",
  fit = "lm",
  xlab = "Contract Actions",
  ylab = "Contract $ (millions)",
  theme = "white"
)

10 looks good.

Next we can add the cluster to our summary data.

clusters <- 
  tbl_vendors %>% select(-id_duns) %>%
  select_if(is.numeric) %>%
  rtemis::u.PAM(k = 10)

[2020-11-05 16:17:42 rtemis::u.PAM] Hello, alexbresler [2020-11-05 16:17:43 rtemis::u.PAM] Partitioning Around Medoids with k = 10… C pam(): computing 228151 dissimilarities from 676 x 10 matrix: [Ok] pam()’s bswap(*, s=1.05479e+09, pamonce=0): build 10 medoids: new repr. 338 new repr. 9 new repr. 53 new repr. 2 new repr. 129 new repr. 21 new repr. 545 new repr. 217 new repr. 15 new repr. 4 after build: medoids are 2 4 9 15 21 53 129 217 338 545 and min.dist dysma[1:n] are 1.24e+08 0 2.31e+08 0 1.9e+07 1.11e+07 6.9e+06 1.17e+06 0 3.32e+07 4.15e+07 3.3e+07 2.11e+07 2.04e+07 0 1.99e+06 2.11e+07 2.06e+07 1.72e+07 1.27e+07 0 3.6e+06 8.29e+06 1.02e+07 1.32e+07 1.74e+07 2.63e+07 2.7e+07 3.6e+07 3.61e+07 3.16e+07 3.02e+07 2.95e+07 2.92e+07 2.03e+07 1.82e+07 1.68e+07 1.58e+07 1.58e+07 1.42e+07 1.19e+07 1.11e+07 1.01e+07 9.5e+06 9.28e+06 8.8e+06 8.32e+06 7.68e+06 3.33e+06 1.62e+06 1.02e+06 2.7e+05 0 6.51e+05 1.17e+06 1.21e+06 1.51e+06 2.89e+06 3.66e+06 4.09e+06 5.78e+06 7.74e+06 8.54e+06 9.28e+06 1.07e+07 1.08e+07 1.13e+07 1.16e+07 1.21e+07 1.47e+07 1.52e+07 1.5e+07 1.49e+07 1.41e+07 1.31e+07 1.19e+07 1.09e+07 1.07e+07 1.02e+07 9.58e+06 9.57e+06 9.43e+06 9.35e+06 9.13e+06 9.02e+06 8.95e+06 8.86e+06 8.16e+06 7.21e+06 6.8e+06 6.46e+06 6.37e+06 6.06e+06 5.71e+06 5.67e+06 5.04e+06 4.91e+06 4.64e+06 4.6e+06 4.33e+06 4.71e+06 4.03e+06 3.99e+06 3.28e+06 3.24e+06 3.06e+06 2.85e+06 2.48e+06 2.34e+06 2.31e+06 2.28e+06 2.24e+06 2.19e+06 1.96e+06 1.74e+06 1.77e+06 1.62e+06 1.55e+06 1.29e+06 1.11e+06 1.06e+06 9.78e+05 9.51e+05 9.93e+05 8.31e+05 6.99e+05 6.49e+05 2.93e+05 0 6.22e+05 6.67e+05 1.12e+06 1.14e+06 1.14e+06 1.53e+06 1.59e+06 1.6e+06 1.83e+06 1.97e+06 2.22e+06 2.32e+06 2.4e+06 2.61e+06 2.82e+06 2.93e+06 3.28e+06 3.19e+06 3.32e+06 3.34e+06 3.42e+06 3.48e+06 3.59e+06 3.83e+06 4.02e+06 4.82e+06 4.26e+06 4.57e+06 4.6e+06 4.8e+06 4.94e+06 5.16e+06 5.18e+06 5.31e+06 5.33e+06 5.55e+06 5.61e+06 5.62e+06 5.53e+06 5.49e+06 5.48e+06 5.31e+06 5.14e+06 4.94e+06 4.65e+06 4.38e+06 4.2e+06 4.22e+06 4.18e+06 4.03e+06 3.9e+06 3.58e+06 3.98e+06 3.32e+06 3.23e+06 3.74e+06 2.84e+06 2.74e+06 2.69e+06 3.99e+06 2.64e+06 2.55e+06 4.78e+06 2.48e+06 2.34e+06 2.05e+06 1.4e+06 3.95e+06 1.36e+06 1.2e+06 1.15e+06 1.09e+06 1.05e+06 9.79e+05 9.79e+05 9.35e+05 9.36e+05 1.25e+06 6.99e+05 8.17e+05 6.07e+05 5.7e+05 7.9e+05 4.67e+05 3.53e+05 1.33e+05 4.08e+05 0 2.49e+05 1.77e+06 3.91e+05 1.45e+05 4.57e+05 1.75e+06 5.18e+05 5.7e+05 5.42e+05 1.21e+06 6.47e+05 7.16e+05 7.43e+05 7.63e+05 8.42e+05 9.26e+05 9.4e+05 9.92e+05 1.13e+06 1.09e+06 1.37e+06 1.19e+06 3.07e+06 1.23e+06 1.21e+06 1.29e+06 1.34e+06 1.37e+06 1.8e+06 1.52e+06 1.52e+06 1.58e+06 1.56e+06 1.59e+06 1.6e+06 1.64e+06 1.66e+06 2.01e+06 1.81e+06 1.84e+06 1.9e+06 2.02e+06 2.05e+06 2.22e+06 2.31e+06 2.53e+06 2.42e+06 2.44e+06 2.46e+06 2.45e+06 2.58e+06 2.76e+06 2.68e+06 2.7e+06 2.83e+06 2.67e+06 2.66e+06 2.62e+06 2.54e+06 2.5e+06 2.5e+06 2.46e+06 2.53e+06 2.37e+06 2.3e+06 2.33e+06 2.21e+06 2.1e+06 2.11e+06 2.06e+06 1.96e+06 1.95e+06 1.92e+06 1.91e+06 1.77e+06 1.7e+06 1.61e+06 1.6e+06 1.6e+06 1.62e+06 1.53e+06 2.44e+06 1.53e+06 1.52e+06 1.52e+06 1.42e+06 1.44e+06 1.32e+06 1.57e+06 1.14e+06 1.12e+06 1.08e+06 1.06e+06 9.14e+05 8.55e+05 1.44e+06 2.77e+06 7.74e+05 6.87e+05 6.56e+05 7.07e+05 8.63e+05 7.52e+05 5.14e+05 4.9e+05 5.27e+05 4.56e+05 4.53e+05 8.65e+05 5.75e+05 4.85e+05 3.39e+05 3.4e+05 4.05e+05 2.86e+05 2.27e+05 2.19e+05 2.9e+05 1.73e+05 3.16e+05 0 2.3e+05 1.35e+05 2.27e+05 1.05e+05 2.34e+05 2.05e+05 2.13e+05 1.69e+05 2.51e+05 2.07e+05 3.63e+05 3.75e+05 4.22e+05 3.78e+05 4.64e+05 5.25e+05 4.9e+05 5.34e+05 7.07e+05 6.31e+05 6.03e+05 1.34e+06 6.74e+05 7.43e+05 7.23e+05 7.57e+05 7.97e+05 7.92e+05 8.02e+05 8.82e+05 9.12e+05 9.27e+05 9.24e+05 9.57e+05 9.74e+05 1.02e+06 1.24e+06 1.05e+06 1.11e+06 1.15e+06 1.14e+06 1.2e+06 1.21e+06 1.24e+06 2.06e+06 1.27e+06 1.3e+06 1.31e+06 1.33e+06 1.43e+06 1.45e+06 1.49e+06 1.71e+06 1.51e+06 1.51e+06 1.54e+06 1.54e+06 1.58e+06 1.57e+06 1.68e+06 1.7e+06 1.7e+06 1.73e+06 1.73e+06 1.77e+06 1.78e+06 1.8e+06 1.8e+06 1.84e+06 1.83e+06 1.84e+06 1.83e+06 1.83e+06 1.77e+06 1.98e+06 1.76e+06 1.71e+06 1.7e+06 1.69e+06 1.77e+06 1.66e+06 1.65e+06 1.64e+06 1.63e+06 1.66e+06 1.62e+06 1.61e+06 1.6e+06 1.61e+06 1.6e+06 2.07e+06 1.58e+06 1.64e+06 1.51e+06 1.5e+06 1.49e+06 1.47e+06 1.42e+06 1.4e+06 1.36e+06 1.35e+06 1.38e+06 1.32e+06 1.33e+06 1.37e+06 1.27e+06 1.25e+06 1.25e+06 1.21e+06 1.26e+06 1.21e+06 1.24e+06 1.19e+06 1.21e+06 1.19e+06 1.17e+06 1.13e+06 1.15e+06 1.07e+06 1.03e+06 1.03e+06 9.85e+05 9.61e+05 9.52e+05 9.09e+05 8.91e+05 9.12e+05 8.75e+05 8.65e+05 8.54e+05 8.26e+05 8.29e+05 8.15e+05 8.03e+05 7.92e+05 7.64e+05 8.42e+05 7.49e+05 7.45e+05 7.46e+05 7.37e+05 7.11e+05 6.98e+05 6.96e+05 7.22e+05 6.73e+05 6.36e+05 6.3e+05 6.38e+05 6.1e+05 6.12e+05 6.08e+05 6.04e+05 6.03e+05 6.25e+05 6.19e+05 5.73e+05 5.62e+05 5.63e+05 5.67e+05 5.55e+05 5.55e+05 5.39e+05 5.21e+05 5.17e+05 4.98e+05 4.97e+05 5e+05 4.7e+05 4.62e+05 4.59e+05 4.46e+05 4.46e+05 6.09e+05 4.64e+05 3.73e+05 3.62e+05 3.53e+05 3.48e+05 3.49e+05 3.41e+05 3.26e+05 3.23e+05 2.79e+05 4.73e+05 2.66e+05 2.41e+05 2.35e+05 2.5e+05 2.42e+05 2.01e+05 1.82e+05 1.75e+05 2.14e+05 1.61e+05 1.3e+05 1.76e+05 6.24e+05 1.09e+05 8.92e+04 2.08e+05 1.96e+05 4.5e+04 7.46e+04 2.98e+04 7.94e+04 0 6.21e+04 1.11e+05 2.46e+04 1.45e+05 5.61e+04 1.2e+05 8.65e+04 6.74e+04 7.73e+04 7.31e+04 1.17e+05 1.12e+05 9.43e+04 1.84e+05 1.42e+05 1.89e+05 1.15e+05 1.15e+05 1.89e+05 1.44e+05 1.78e+05 1.59e+05 1.64e+05 1.64e+05 1.78e+05 1.87e+05 2.05e+05 2.1e+05 2.44e+05 4.99e+05 2.19e+05 2.26e+05 2.29e+05 2.55e+05 3.3e+05 2.61e+05 2.58e+05 2.72e+05 2.86e+05 2.97e+05 3e+05 3.09e+05 3.36e+05 5.22e+05 4.05e+05 4.02e+05 4.05e+05 4.19e+05 4.62e+05 5.56e+05 4.5e+05 4.64e+05 4.84e+05 4.76e+05 4.68e+05 4.89e+05 4.94e+05 4.84e+05 5.08e+05 5.03e+05 5.27e+05 5.18e+05 5.31e+05 5.37e+05 5.43e+05 5.46e+05 5.69e+05 5.58e+05 5.66e+05 5.87e+05 6.03e+05 6.1e+05 6.19e+05 6.12e+05 6.29e+05 6.27e+05 6.26e+05 6.43e+05 6.52e+05 6.74e+05 6.81e+05 6.81e+05 6.91e+05 7.03e+05 7.35e+05 7.23e+05 7.22e+05 7.23e+05 7.43e+05 7.39e+05 7.4e+05 7.55e+05 7.71e+05 7.68e+05 7.73e+05 7.66e+05 8.12e+05 7.99e+05 8.19e+05 8.3e+05 8.27e+05 8.24e+05 8.3e+05 8.37e+05 8.36e+05 8.36e+05 8.59e+05 8.64e+05 8.61e+05 8.73e+05 8.87e+05 9.07e+05 8.95e+05 9.01e+05 9.14e+05 9.15e+05 9.13e+05 9.29e+05 9.3e+05 9.44e+05 9.49e+05 9.6e+05 9.51e+05 9.64e+05 9.63e+05 9.64e+05 9.72e+05 9.71e+05 9.7e+05 9.78e+05 9.84e+05 swp new 26 <-> 21 old; decreasing diss. 2.23105e+09 by -3.38117e+07 swp new 115 <-> 129 old; decreasing diss. 2.19724e+09 by -2.21283e+07 swp new 210 <-> 217 old; decreasing diss. 2.17511e+09 by -724555 swp new 111 <-> 115 old; decreasing diss. 2.17439e+09 by -119224 swp new 8 <-> 9 old; decreasing diss. 2.17427e+09 by -8711.59 end{bswap()}, end{cstat()}

[2020-11-05 16:17:43 rtemis::u.PAM] Run completed in 0.01 minutes (Real: 0.50; User: 0.48; System: 2e-03)

tbl_vendors <-
  tbl_vendors %>%
  mutate(pam_cluster = as.character(clusters$clusters.train),
         .before = "id_duns")

Now we can build the trelliscope. We need to build a nested table with the cumulative awards for each of the Phase III awardees.

tbl_cumulative <-
  df_all %>%
  filter(id_duns %in% c(tbl_vendors$id_duns)) %>%
  group_by(id_duns, date_obligation) %>%
  summarise(amount = sum(amount_obligation)) %>%
  mutate(amount_cumulative = cumsum(amount)) %>%
  ungroup() %>%
  group_by(id_duns) %>%
  nest() %>%
  ungroup()

Lets see if it worked

tbl_cumulative$data[[7]] %>% plot_gg_cumulative_area(plot_title = "Test")

Now we can build the trelliscope using the function we built to plot the cumulative area for each of the companies. We also want to join in all the features we want to explore [this also won’t render in the final version of the markdown]

tbl_trelliscope_data <-
  tbl_vendors %>%
  left_join(tbl_cumulative, by = "id_duns") %>%
  left_join(tbl_first %>% select(id_duns, department_first, agency_first, office_first),
            by = "id_duns") %>%
  mutate_at(c("year_recent", "year_first"), as.character)
trell_sample <- tbl_trelliscope_data %>%
  sample_n(10) %>%
  mutate(amount = round(amount / 1000000, digits = 2),
         panel = map_plot(data,
                          function(data) {
                            plot_gg_cumulative_area(
                              data = data,
                              plot_title = "Cumulative Phase IIIs",
                              use_lm = T,
                              use_loess = T
                            )
                          })) %>%
  arrange(desc(amount)) %>%
  trelliscope(
    name = "Phase III Cumulative Awardee Trelliscope",
    desc = "Minimum of 2 Phase III contracts",
    nrow = 1,
    ncol = 2,
    width = 500,
    height = 500,
    state = list(
      labels = c(
        "vendor",
        "amount",
        "contracts",
        'date_first',
        "date_recent",
        "pam_cluster",
        "department_first",
        "agency_first",
        "office_first"
      ),
      sort_spec(name = "amount", dir = "desc")
    )
  )
trell_sample

Looks good, now lets build the full trelliscope.

phase_3_trelliscope <-
  tbl_trelliscope_data %>%
  mutate(amount = round(amount / 1000000, digits = 2),
         panel = map_plot(data,
                          function(data) {
                            plot_gg_cumulative_area(
                              data = data,
                              plot_title = "Cumulative Phase IIIs",
                              use_lm = T,
                              use_loess = T
                            )
                          })) %>%
  arrange(desc(amount)) %>%
  trelliscope(
    name = "Phase III Cumulative Awardee Trelliscope",
    desc = "Minimum of 2 Phase III contracts",
    nrow = 1,
    ncol = 2,
    width = 500,
    height = 500,
    state = list(
      labels = c(
        "vendor",
        "amount",
        "contracts",
        'date_first',
        "date_recent",
        "pam_cluster",
        "department_first",
        "agency_first",
        "office_first"
      ),
      sort_spec(name = "amount", dir = "desc")
    )
  )
phase_3_trelliscope

Part V: Exploring the AFWERX Portfolio

Now lets explore the AFWERX portfolio.

AFWERX Portfolio Data

First lets bring in the data

df_afwerx <- sbir_afwerx_portfolio(use_cached = T)

Now lets look at the data

df_afwerx %>% glimpse()

Rows: 1,759 Columns: 51 $ date_updated 2020-10-31, 202… $ id 1, 2, 3, 4, 5, 6… $ id_phase “PHASE I”, “PHAS… $ type_solicitation ”OPEN TOPIC“,”O… $ name_solicitation “AETC OPEN TOPIC… $ group_solicitation ”AF191-004“,”AF… $ id_office “AF191”, “AF191”… $ id_cohort “004”, “004”, “0… $ id_program ”SBIR“,”SBIR“, … $ is_open_topic_awardee TRUE, TRUE, TRUE… $ name_company_clean ”BRANCH TECHNOLO… $ name_company “BRANCH TECHNOLO… $ url_company https://www.bra… $ slug_soundex_company “B652325420000”,… $ keyword_afwerx “AUTONOMOUS ROBO… $ keyword_company_clean_afwerx ”AUTONOMOUS SYST… $ group_topic “AF191”, “AF191”… $ name_company_legal_clean “BRANCH TECHNOLO… $ id_duns 79377382, 799165… $ slug_cage ”74NH7“,”7WUZ2“… $ date_registration_initial 2014-05-01, 201… $ date_expiration 2021-04-15, 202… $ date_last_update 2020-04-15, 202… $ date_activation 2020-04-15, 202… $ name_company_legal ”BRANCH TECHNOLO… $ name_company_dbasam “BRANCH TECHNOLO… $ name_company_division NA, NA, NA, NA, … $ date_company_start 2014-04-15, 201… $ url_company_sam https://www.bra… $ email_point_of_contact_electronic_business … $ url_point_of_contact https://branch.… $ email_point_of_contact_electronic_business_alternate … $ location_company ”1530 RIVERSIDE … $ name_company_sam “BRANCH TECHNOLO… $ has_duns TRUE, TRUE, TRUE… $ id_naics_primary 236220, 541714, … $ name_naics_primary ”COMMERCIAL AND … $ name_industry_group_naics "RESIDENTIAL BUI… $ count_naics 6, 3, 3, 1, 1, 1… $ count_naics_exceptions 0, 1, 2, 0, 0, 0… $ is_primary_naics FALSE, FALSE, FA… $ has_naics TRUE, TRUE, TRUE… $ count_product_service_codes NA, NA, 10, 1, N… $ has_product_service_codes TRUE, TRUE, TRUE… $ count_business_types 2, 1, 3, 3, 2, 5… $ has_business_types TRUE, TRUE, TRUE… $ has_sba_types TRUE, TRUE, TRUE… $ date_sam_data 2020-10-04, 202… $ data_psc [NULL, NULL, <t… $ data_naics [<tbl_df[4 x 3]… $ data_business_types [<tbl_df[4 x 2]…

df_afwerx %>% skim()
Data summary
Name Piped data
Number of rows 1759
Number of columns 51
_______________________
Column type frequency:
character 27
Date 7
list 3
logical 7
numeric 7
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id_phase 0 1.00 7 8 0 2 0
type_solicitation 0 1.00 5 10 0 3 0
name_solicitation 381 0.78 6 27 0 18 0
group_solicitation 0 1.00 9 12 0 24 0
id_office 0 1.00 4 6 0 9 0
id_cohort 0 1.00 3 5 0 14 0
id_program 0 1.00 4 4 0 2 0
name_company_clean 1 1.00 3 49 0 1325 0
name_company 1 1.00 3 50 0 1343 0
url_company 114 0.94 13 44 0 1211 0
slug_soundex_company 1 1.00 5 16 0 1314 0
keyword_afwerx 1 1.00 0 1502 3 1716 0
keyword_company_clean_afwerx 1 1.00 0 1018 3 1714 0
group_topic 0 1.00 4 6 0 9 0
name_company_legal_clean 487 0.72 6 49 0 881 0
slug_cage 79 0.96 5 5 0 1249 0
name_company_legal 79 0.96 6 63 0 1249 0
name_company_dbasam 1442 0.18 2 50 0 238 0
name_company_division 1431 0.19 3 48 0 240 0
url_company_sam 543 0.69 13 44 0 892 0
email_point_of_contact_electronic_business 79 0.96 9 43 0 1247 0
url_point_of_contact 79 0.96 13 39 0 1182 0
email_point_of_contact_electronic_business_alternate 1104 0.37 11 40 0 449 0
location_company 79 0.96 24 70 0 1232 0
name_company_sam 79 0.96 6 61 0 1249 0
name_naics_primary 81 0.95 13 118 0 158 0
name_industry_group_naics 83 0.95 16 92 0 101 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date_updated 0 1.00 2020-10-31 2020-10-31 2020-10-31 1
date_registration_initial 79 0.96 2001-04-03 2020-09-13 2016-11-24 973
date_expiration 79 0.96 2020-05-20 2021-10-03 2021-05-13 336
date_last_update 79 0.96 2019-10-07 2020-10-03 2020-06-04 297
date_activation 79 0.96 2019-04-08 2020-10-03 2020-05-18 323
date_company_start 79 0.96 1947-09-01 2020-03-09 2014-11-25 1052
date_sam_data 79 0.96 2020-10-04 2020-10-04 2020-10-04 1

Variable type: list

skim_variable n_missing complete_rate n_unique min_length max_length
data_psc 0 1 545 0 4
data_naics 0 1 314 0 3
data_business_types 0 1 221 0 2

Variable type: logical

skim_variable n_missing complete_rate mean count
is_open_topic_awardee 0 1 0.92 TRU: 1623, FAL: 136
has_duns 0 1 0.96 TRU: 1694, FAL: 65
is_primary_naics 0 1 0.43 FAL: 1005, TRU: 754
has_naics 0 1 1.00 TRU: 1759
has_product_service_codes 0 1 1.00 TRU: 1759
has_business_types 0 1 1.00 TRU: 1759
has_sba_types 0 1 1.00 TRU: 1759

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1.00 880.00 507.92 1 440.5 880 1319.5 1759 ▇▇▇▇▇
id_duns 65 0.96 200946949.03 273975776.72 1141399 79645983.8 81010137 117238694.8 969270474 ▇▁▁▁▁
id_naics_primary 81 0.95 493529.88 95212.35 213112 511210.0 541511 541715.0 927110 ▂▁▇▁▁
count_naics 81 0.95 5.25 6.11 1 2.0 3 6.0 67 ▇▁▁▁▁
count_naics_exceptions 81 0.95 1.04 0.98 0 0.0 1 2.0 7 ▇▂▁▁▁
count_product_service_codes 911 0.48 7.95 14.31 1 1.0 4 9.0 237 ▇▁▁▁▁
count_business_types 79 0.96 2.60 1.58 1 1.0 2 3.0 10 ▇▃▂▁▁

AFWERX Counts

Lets look at some quick counts:

First by phase:

df_afwerx %>%
  count(id_program, id_phase, sort = T) %>% 
  gt()
id_program id_phase n
SBIR PHASE I 1418
SBIR PHASE II 186
STTR PHASE I 155

Next by solicitation type:

df_afwerx %>%
  count(id_phase, type_solicitation, sort = T) %>% 
  gt()
id_phase type_solicitation n
PHASE I OPEN TOPIC 1056
PHASE I OTHER 444
PHASE II OPEN TOPIC 186
PHASE I PITCH DAY 73

Next by solicitation:

df_afwerx %>%
  count(
    id_phase,
    id_program,
    type_solicitation,
    group_solicitation,
    name_solicitation,
    sort = T
  ) %>% 
  gt()
id_phase id_program type_solicitation group_solicitation name_solicitation n
PHASE I SBIR OTHER X201-CSO1 NA 381
PHASE I SBIR OPEN TOPIC AF192-001 OPEN TOPIC 322
PHASE I SBIR OPEN TOPIC AF193-CSO1 SBIR OPEN TOPIC 286
PHASE I SBIR OPEN TOPIC AF191-005 AFWERX OPEN TOPIC 102
PHASE I SBIR OPEN TOPIC AF183-005 AFWERX OPEN TOPIC 99
PHASE I STTR OPEN TOPIC AFX20A-TCSO1 STTR OPEN TOPIC 88
PHASE II SBIR OPEN TOPIC AF192-D001 D2P2 OPEN TOPIC 69
PHASE II SBIR OPEN TOPIC AF183-005 AFWERX OPEN TOPIC 65
PHASE I SBIR OPEN TOPIC AF182-005 AFWERX OPEN TOPIC 52
PHASE II SBIR OPEN TOPIC AF191-005 OPEN TOPIC 43
PHASE I STTR OPEN TOPIC AF19C-T010 STTR OPEN TOPIC 34
PHASE I STTR OPEN TOPIC AF19B-T001 STTR OPEN TOPIC 33
PHASE I SBIR OPEN TOPIC AF183-006 AETC OPEN TOPIC 20
PHASE I SBIR OPEN TOPIC AF191-004 AETC OPEN TOPIC 20
PHASE I SBIR PITCH DAY AF191-010 BATTLEFIELD OPS PITCH DAY 19
PHASE I SBIR PITCH DAY AF191-011 PEO DIGITAL PITCH DAY 17
PHASE I SBIR PITCH DAY AF191-009 C3I&N PITCH DAY 16
PHASE I SBIR PITCH DAY AF192-006 UAS PITCH DAY 13
PHASE I SBIR OTHER AF191-003 MD5 MATERIALS 11
PHASE I SBIR OTHER AF183-001 MD5 MANNED-UNMANNED TEAMING 10
PHASE I SBIR OTHER AF183-002 MD5 MACHINE LEARNING 9
PHASE I SBIR OTHER AF191-001 MD5 C2 9
PHASE II SBIR OPEN TOPIC AF183-006 AETC OPEN TOPIC 9
PHASE I SBIR OTHER AF183-003 MD5 CYBER SECURITY 8
PHASE I SBIR OTHER AF183-004 MD5 MEDICAL 8
PHASE I SBIR OTHER AF191-002 MD5 SPACE 8
PHASE I SBIR PITCH DAY AF192-005 KESSEL RUN PITCH DAY 8

AFWERX Company Keyword Analysis

Now lets explore some of the keywords.

tbl_company_keywords <- df_afwerx %>%
  select(
    id_phase,
    id_program,
    type_solicitation,
    group_solicitation,
    name_solicitation,
    name_company_clean,
    keyword_company_clean_afwerx
  ) %>%
  separate_rows(keyword_company_clean_afwerx, sep = "\\|") %>%
  mutate_all(str_squish)

Now lets count the keywords

tbl_af_keywords <- 
  tbl_company_keywords %>%
  mutate_all(str_squish) %>%
  count(keyword_company_clean_afwerx, sort = T)

Here are all the keywords and their count

af_keywords <- reactable(tbl_af_keywords, filterable = T, searchable = T)
af_keywords

Now lets explore the top 50 keywords.

gg_top_50 <- 
  tbl_af_keywords %>%
  slice(1:50) %>%
  mutate(keyword_company_clean_afwerx = fct_reorder(keyword_company_clean_afwerx, n)) %>% 
  ggplot(aes(x = keyword_company_clean_afwerx, y = n)) +
  geom_bar(
    stat = "identity",
    fill = "#f68060",
    alpha = .6,
    width = .4
  ) +
  coord_flip() +
  xlab("") +
  theme_bw() +
  scale_y_log10() +
  geom_text(aes(label = n),
            position = position_dodge(width = 0.9),
            vjust = -0.25) +
  ggtitle("Top 50 Keywords for AFWERX Companies")
gg_top_50

AFWERX Cohort Keyword Log Odds Analysis

Now lets take a look at which keywords associate with which cohorts disproportionately.

First lets build the count.

tbl_keyword_counts <- tbl_company_keywords %>%
  count(type_solicitation, keyword_company_clean_afwerx, sort = T)

Now lets explore the density of the count.

mplot3.x(x = tbl_keyword_counts$n)

Really skews towards low keyword count. Lets make sure the keyword shows up atleast 5 times.

tbl_keyword_counts %>%
  filter(n >= 5) %>%
  pull(n) %>%
  mplot3.x(x = .)

This looks better!

Lets look at the log odds table now.

tbl_lo <-
  tbl_keyword_counts %>%
  filter(n >= 5) %>%
  tidylo::bind_log_odds(set = type_solicitation, feature = keyword_company_clean_afwerx, n = n) %>%
  arrange(desc(log_odds_weighted))

Now lets look at the table

reactable(tbl_lo, filterable = T, sortable = T, searchable = T)

Which AFWERX Words Are Counted Together?

Now lets use some pairwise analysis conditioned on company to see which words often appear together.

tbl_company_keywords %>%
  pairwise_count(feature = name_company_clean, item = keyword_company_clean_afwerx) %>%
  arrange(desc(n)) %>% 
  reactable(filterable = T, searchable = T)
Which AFWERX Companies are Very Similar Based Upon Keywords?

Now lets look at highly correlated companies using pairwise correlations conditioning keyword againt company.

hc_cor_key <- 
  tbl_company_keywords %>%
  count(name_company_clean, keyword_company_clean_afwerx) %>%
  pairwise_cor(item = name_company_clean, feature = keyword_company_clean_afwerx) %>%
  filter(correlation > .70) %>%
  graph_from_data_frame() %>%
  hchart() %>% 
  hc_add_theme(hc_theme_darkunica()) %>% 
  hc_title(text = "Highly Correlated Afwerx Portfolio Company by Keywords") %>% 
  hc_xAxis(visible = F) %>% 
  hc_yAxis(visible = F)
hc_cor_key

Part VI: The 2021 Defense Budget

Finally lets explore the 2021 Department of Defense Budget.

Acquire 2021 Budget Data

First step is to acquire the data.

df_budget <- dod_years_budgets(budget_years = 2021, snake_names = T)

[1] “FY 2019” “FY 2020” “FY 2021”
[4] “FY 2020 OCO ENACTMENT” “FY 2020 EMERGENCY” “FY 2021 OCO”
[1] “EXHIBIT M-1” “FY 2019 (BASE + OCO)”
[3] “FY 2020 BASE ENACTED” “FY 2020 EMERGENCY”
[5] “FY 2020 OCO ENACTED” “FY 2020 TOTAL ENACTED (BASE+EME” [7] “FY 2021 BASE” “FY 2021 OCO FOR BASE REQUIREMEN” [9] “FY 2021 OCO FOR DIRECT WAR AND” “FY 2021 TOTAL OCO”
[11] “FY 2021 TOTAL (BASE + OCO)”
[1] “O&M TITLE PLUS INDEFINITE” “O&M TITLE”
[3] “INDEFINITE ACCOUNTS” “FY 2019 (BASE + OCO)”
[5] “FY 2020 BASE ENACTED” “FY 2020 EMERGENCY”
[7] “FY 2020 OCO ENACTED” “FY 2020 TOTAL ENACTED (BASE+EME” [9] “FY 2021 BASE” “FY 2021 OCO FOR BASE REQUIREMEN” [11] “FY 2021 OCO FOR DIRECT WAR AND” “FY 2021 TOTAL OCO”
[13] “FY 2021 TOTAL (BASE + OCO)”
[1] “EXHIBIT P-1” “FY 2019 (BASE + OCO)”
[3] “FY 2020 BASE ENACTED” “FY 2020 EMERGENCY”
[5] “FY 2020 OCO ENACTED” “FY 2020 TOTAL ENACTED (BASE+EME” [7] “FY 2021 BASE” “FY 2021 OCO FOR BASE REQUIREMEN” [9] “FY 2021 OCO FOR DIRECT WAR AND” “FY 2021 TOTAL OCO”
[11] “FY 2021 TOTAL (BASE + OCO)”
[1] “EXHIBIT P-1R” “FY 2019 (BASE + OCO)”
[3] “FY 2020 BASE ENACTED” “FY 2020 EMERGENCY”
[5] “FY 2020 OCO ENACTED” “FY 2020 TOTAL ENACTED (BASE+EME” [7] “FY 2021 BASE” “FY 2021 OCO FOR BASE REQUIREMEN” [9] “FY 2021 OCO FOR DIRECT WAR AND” “FY 2021 TOTAL OCO”
[11] “FY 2021 TOTAL (BASE + OCO)”
[1] “EXHIBIT R-1” “FY 2019 (BASE + OCO)”
[3] “FY 2020 BASE ENACTED” “FY 2020 EMERGENCY”
[5] “FY 2020 OCO ENACTED” “FY 2020 TOTAL ENACTED (BASE+EME” [7] “FY 2021 BASE” “FY 2021 OCO FOR BASE REQUIREMEN” [9] “FY 2021 OCO FOR DIRECT WAR AND” “FY 2021 TOTAL OCO”
[11] “FY 2021 TOTAL (BASE + OCO)”
[1] “RF TITLE” “FY 2019 (BASE + OCO)”
[3] “FY 2020 BASE ENACTED” “FY 2020 EMERGENCY”
[5] “FY 2020 OCO ENACTED” “FY 2020 TOTAL ENACTED (BASE+EME” [7] “FY 2021 BASE” “FY 2021 OCO FOR BASE REQUIREMEN” [9] “FY 2021 OCO FOR DIRECT WAR AND” “FY 2021 TOTAL OCO”
[11] “FY 2021 TOTAL (BASE + OCO)”

Lets take a look at it.

skim(df_budget)
Data summary
Name df_budget
Number of rows 3257
Number of columns 65
_______________________
Column type frequency:
character 44
logical 4
numeric 15
POSIXct 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
id_federal_account 0 1.00 8 8 0 99 0
slug_treasury_agency 0 1.00 1 1 0 4 0
name_agency_cgac 0 1.00 21 27 0 4 0
slug_dod_budget_group 0 1.00 2 3 0 7 0
name_dod_budget_group 0 1.00 4 29 0 7 0
slug_classification 0 1.00 1 1 0 1 0
type_budget 0 1.00 3 4 0 2 0
type_budget_sub 2781 0.15 0 29 14 3 0
slug_budget_parent 4 1.00 2 2 0 11 0
code_account_omb 0 1.00 4 6 0 97 0
name_account_omb_clean 0 1.00 4 69 0 35 0
name_account_omb 0 1.00 4 45 0 45 0
slug_organization 24 0.99 2 7 0 39 0
slug_organization_account 0 1.00 1 13 0 37 0
name_budget_parent_actual 0 1.00 5 48 0 115 0
name_budget_parent 0 1.00 5 48 0 108 0
name_budget_parent_detail 3065 0.06 2 18 0 9 0
slug_budget_activity 1141 0.65 2 3 0 132 0
name_budget_activity_actual 0 1.00 2 80 0 1150 0
name_budget_activity 0 1.00 2 80 0 1118 0
name_budget_activity_detail 3058 0.06 3 44 0 58 0
code_program_element 336 0.90 1 12 0 2132 0
name_program_element_actual 0 1.00 2 80 0 1943 0
name_program_element 0 1.00 2 80 0 1941 0
name_location 3047 0.06 4 45 0 84 0
name_construction_project 3047 0.06 8 12 0 194 0
name_state_country 3047 0.06 4 21 0 48 0
url_budget_dod_file 0 1.00 77 78 0 7 0
id_line_number 811 0.75 1 3 0 333 0
slug_cost 1976 0.39 1 1 0 12 0
type_cost 1976 0.39 1 35 0 14 0
slug_state_country 3047 0.06 2 2 0 48 0
detail_program_element 3237 0.01 1 3 0 4 0
name_bureau_omb 783 0.76 21 22 0 3 0
name_agency 81 0.98 21 27 0 4 0
name_account 81 0.98 23 87 0 90 0
name_function_budget 81 0.98 16 16 0 1 0
name_sub_function 81 0.98 30 30 0 1 0
type_obligation_period 3242 0.00 1 1 0 1 0
slug_account 81 0.98 7 11 0 90 0
code_financial_reporting_entity 81 0.98 4 4 0 1 0
name_financial_reporting_entity 81 0.98 21 21 0 1 0
id_financial_indicator 3159 0.03 1 1 0 1 0
type_financial_indicator 3159 0.03 7 7 0 1 0

Variable type: logical

skim_variable n_missing complete_rate mean count
is_budget_current_year 0 1.00 1.00 TRU: 3257
is_added 1873 0.42 0.00 FAL: 1384
is_toa 1505 0.54 0.99 TRU: 1727, FAL: 25
is_active 81 0.98 1.00 TRU: 3176

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year_budget 0 1.00 2021.00 0.00 2021 2021.0 2021 2021.0 2021 ▁▁▇▁▁
id_cgac 0 1.00 39.38 28.55 17 17.0 21 57.0 97 ▇▁▃▁▂
amount_item 0 1.00 220420806.88 972864370.59 -6227811000 6880000.0 29593000 121394000.0 21086112000 ▁▇▁▁▁
count_item 3081 0.05 314.79 1111.74 1 5.0 21 98.5 10000 ▇▁▁▁▁
amount_unit_cost 3081 0.05 107281719.99 1344535886.78 -6227811000 195429.1 4139316 23168291.7 14393443000 ▁▇▁▁▁
number_account 81 0.98 69056.78 3474.11 7647 69276.0 69300 69322.0 70978 ▁▁▁▁▇
id_allocation_omb 3257 0.00 NaN NA NA NA NA NA NA
id_agency 81 0.98 39.01 28.77 17 17.0 21 57.0 97 ▇▁▃▁▂
period_available_begining 96 0.97 2017.00 0.10 2013 2017.0 2017 2017.0 2017 ▁▁▁▁▇
period_available_end 96 0.97 2018.64 1.06 2013 2018.0 2019 2019.0 2021 ▁▁▂▇▁
id_account_omb 81 0.98 1882.76 986.49 100 1319.0 2020 2065.0 5189 ▃▇▁▂▁
id_sub_account_omb 81 0.98 0.00 0.05 0 0.0 0 0.0 2 ▇▁▁▁▁
code_bureau_omb 81 0.98 0.00 0.00 0 0.0 0 0.0 0 ▁▁▇▁▁
code_function_budget 81 0.98 50.00 0.00 50 50.0 50 50.0 50 ▁▁▇▁▁
code_sub_function 81 0.98 51.00 0.00 51 51.0 51 51.0 51 ▁▁▇▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
datetime_program_established 81 0.98 1987-10-01 2016-10-01 2016-10-01 7
datetime_program_ended 3257 0.00 NA NA NA 0
glimpse(df_budget)

Rows: 3,257 Columns: 65 $ id_federal_account “021-0720”, “021-2050”, “021-0720”, “… $ year_budget 2021, 2021, 2021, 2021, 2021, 2021, 2… $ slug_treasury_agency ”A“,”A“,”A“,”A“,”A“,”N“,”A“,”N… $ name_agency_cgac “DEPARTMENT OF THE ARMY”, “DEPARTMENT… $ id_cgac 21, 21, 21, 21, 21, 17, 21, 17, 21, 1… $ is_budget_current_year TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T… $ slug_dod_budget_group ”C1“,”C1“,”C1“,”C1“,”C1“,”C1“,”… $ name_dod_budget_group “MILITARY CONSTRUCTION”, “MILITARY CO… $ slug_classification ”U“,”U“,”U“,”U“,”U“,”U“,”U“,”U… $ type_budget “BASE”, “OCO”, “BASE”, “OCO”, “BASE”,… $ type_budget_sub NA, "“, NA,”“, NA,”“, NA,”“, NA,”… $ slug_budget_parent “01”, “02”, “01”, “03”, “04”, “01”, “… $ code_account_omb ”0720“,”2050“,”0720“,”2050“,”0720… $ name_account_omb_clean “FAMILY HOUSING CONSTRUCTION”, “MILIT… $ name_account_omb ”FAM HSG CON“,”MILCON“,”FAM HSG CON… $ slug_organization “ARMY”, “ARMY”, “ARMY”, “ARMY”, “ARMY… $ slug_organization_account ”ARMY“,”ARMY“,”ARMY“,”ARMY“,”ARMY… $ name_budget_parent_actual “NEW CONSTRUCTION”, “MINOR CONSTRUCTI… $ name_budget_parent ”NEW CONSTRUCTION“,”MINOR CONSTRUCTI… $ name_budget_parent_detail NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ slug_budget_activity NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ name_budget_activity_actual “FAM HSG DEWELLINGS”, “MILCON UNSPECI… $ name_budget_activity ”FAM HSG DEWELLINGS“,”MILCON UNSPECI… $ name_budget_activity_detail NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ code_program_element “09133000”, “09614800”, “07788100”, “… $ name_program_element_actual ”FAMILY HOUSING NEW CONSTRUCTION“,”E… $ name_program_element “FAMILY HOUSING NEW CONSTRUCTION”, “E… $ name_location ”VICENZA“,”UNSPECIFIED WORLDWIDE LOC… $ name_construction_project “09133000”, “09614800”, “07788100”, “… $ name_state_country ”ITALY“,”WORLDWIDE UNSPECIFIED“,”KW… $ is_added NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ is_toa NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ url_budget_dod_file https://comptroller.defense.gov/port… $ id_line_number NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ slug_cost NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ type_cost NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ slug_state_country ”IT“,”ZU“,”KW“,”ZU“,”ZU“,”SP“,”… $ amount_item 84100000, 3970000, 32000000, 11903000… $ count_item NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ amount_unit_cost NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ detail_program_element NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ is_active TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, T… $ name_bureau_omb “DEPARTMENT OF THE ARMY”, “DEPARTMENT… $ name_agency ”DEPARTMENT OF THE ARMY“,”DEPARTMENT… $ name_account “FAMILY HOUSING CONSTRUCTION, ARMY”, … $ name_function_budget “NATIONAL DEFENSE”, “NATIONAL DEFENSE… $ name_sub_function ”DEPARTMENT OF DEFENSE-MILITARY“,”DE… $ number_account 69302, 69305, 69302, 69305, 69302, 69… $ id_allocation_omb NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ id_agency 21, 21, 21, 21, 21, 17, 21, 17, 21, 1… $ period_available_begining 2017, 2017, 2017, 2017, 2017, 2017, 2… $ period_available_end 2021, 2021, 2021, 2021, 2021, 2021, 2… $ type_obligation_period NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ id_account_omb 720, 2050, 720, 2050, 720, 1205, 2050… $ id_sub_account_omb 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… $ slug_account “2117/210720”, “2117/212050”, “2117/2… $ code_bureau_omb 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0… $ code_financial_reporting_entity ”DE00“,”DE00“,”DE00“,”DE00“,”DE00… $ name_financial_reporting_entity “DEPARTMENT OF DEFENSE”, "DEPARTMENT … $ id_financial_indicator NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ type_financial_indicator NA, NA, NA, NA, NA, NA, NA, NA, NA, N… $ code_function_budget 50, 50, 50, 50, 50, 50, 50, 50, 50, 5… $ code_sub_function 51, 51, 51, 51, 51, 51, 51, 51, 51, 5… $ datetime_program_established 2016-10-01, 2016-10-01, 2016-10-01, … $ datetime_program_ended NA, NA, NA, NA, NA, NA, NA, NA, NA, …

Macro Exploration of the Budget

Lets explore the breakdown by agency:

df_budget %>%
  count(name_agency_cgac,
        wt = amount_item,
        name = "amount",
        sort = T) %>%
  munge_data() %>%
  gt()
name_agency_cgac amount
DEPARTMENT OF THE NAVY $212,913,717,000
DEPARTMENT OF THE AIR FORCE $208,780,303,000
DEPARTMENT OF THE ARMY $180,378,777,000
DEPARTMENT OF DEFENSE $115,837,771,000

Now let’s take a look at breakdown by Agency and Budget Group:

df_budget %>%
  count(
    name_dod_budget_group,
    name_agency_cgac,

    wt = amount_item,
    name = "amount",
    sort = T
  ) %>%
  munge_data() %>%
  gt()
name_dod_budget_group name_agency_cgac amount
OPERATION AND MAINTENANCE DEPARTMENT OF DEFENSE $79,581,776,000
OPERATION AND MAINTENANCE DEPARTMENT OF THE ARMY $72,986,774,000
OPERATION AND MAINTENANCE DEPARTMENT OF THE NAVY $70,602,087,000
OPERATION AND MAINTENANCE DEPARTMENT OF THE AIR FORCE $65,902,610,000
MILITARY PERSONNEL DEPARTMENT OF THE ARMY $65,514,816,000
PROCUREMENT DEPARTMENT OF THE NAVY $62,251,320,000
MILITARY PERSONNEL DEPARTMENT OF THE NAVY $55,175,348,000
PROCUREMENT DEPARTMENT OF THE AIR FORCE $49,821,142,000
RDTE DEPARTMENT OF THE AIR FORCE $47,724,725,000
MILITARY PERSONNEL DEPARTMENT OF THE AIR FORCE $42,809,274,000
RDTE DEPARTMENT OF DEFENSE $25,919,555,000
PROCUREMENT DEPARTMENT OF THE ARMY $24,333,664,000
RDTE DEPARTMENT OF THE NAVY $21,491,016,000
RDTE DEPARTMENT OF THE ARMY $12,885,453,000
PROCUREMENT DEPARTMENT OF DEFENSE $6,732,458,000
MILITARY CONSTRUCTION DEPARTMENT OF THE NAVY $2,810,804,000
PROCUREMENT RESERVE DEPARTMENT OF THE ARMY $2,792,431,000
MILITARY CONSTRUCTION DEPARTMENT OF DEFENSE $2,407,501,000
MILITARY CONSTRUCTION DEPARTMENT OF THE AIR FORCE $1,947,974,000
MILITARY CONSTRUCTION DEPARTMENT OF THE ARMY $1,788,832,000
REVOLVING AND MANAGEMENT FUND DEPARTMENT OF DEFENSE $1,196,481,000
PROCUREMENT RESERVE DEPARTMENT OF THE NAVY $583,142,000
PROCUREMENT RESERVE DEPARTMENT OF THE AIR FORCE $478,866,000
REVOLVING AND MANAGEMENT FUND DEPARTMENT OF THE AIR FORCE $95,712,000
REVOLVING AND MANAGEMENT FUND DEPARTMENT OF THE ARMY $76,807,000

Which Items Are We Buying

Now lets take a look at what specific items we are looking to acquire.

tbl_items <-
  df_budget %>%
  filter(amount_unit_cost > 0, count_item > 0) %>%
  select(
    count_item,
    amount_unit_cost,
    name_program_element_actual,
    name_agency_cgac,
    slug_organization_account
  ) %>%
  distinct()

Lets first take a look at a static scatter plot.

gg_items <-
  tbl_items %>%
  ggplot(aes(x = count_item, y = amount_unit_cost, color = name_agency_cgac)) +
  geom_jitter() +
  theme_ipsum() +
  labs(title = "2021 Requested Budget Items",
       subtitle = "Untransformed and Static") +
  scale_y_continuous(labels = scales::dollar)

Lets see how this looks?

gg_items

Not great, some huge skew and it would be nice to be able to see the exact line items.

Lets build an interactive chart that does that.

hc_items <-
  hchart(
  tbl_items,
  "scatter",
  hcaes(
    x = count_item,
    y = amount_unit_cost,
    group = name_agency_cgac,
    name = name_program_element_actual
  ),
  marker = list(radius = 3, symbol = 'circle'),
  regression = TRUE
) %>%
  hc_title(text = "2021 Defense Budget Requested Items") %>%
  hc_xAxis(title = list(text = "Item Count (log10 transformed)"),
           type = "logarithmic") %>%
  hc_yAxis(title = list(text = "$ Amount Per Unit (log 10 transformed)"),
           type = "logarithmic") %>%
  hc_tooltip(
    table = TRUE,
    outside = TRUE,
    shared = TRUE,
    useHTML = TRUE,
    headerFormat = "<small>{point.key}</small><table>",
    pointFormat = str_c(
      "<tr><td style=\"color: {series.color}\">{series.name}: </td>",
      "<tr><td style=\"text-align: right\"><b>Items: {point.x:,.0f}</b></td></tr>",
      "<tr><td style=\"text-align: right\"><b>$ Per item: {point.y:,0.f}</b></td></tr>",
      "<td style=\"text-align: right\"></td>"
    ),
    style = list(fontSize = "0.7em")
  ) %>%
  hc_colors(c("#d35400", "#2980b9", "#2ecc71", "black")) %>%
  hc_add_dependency("plugins/highcharts-regression.js") %>%
  hc_add_theme(hc_theme_538())
hc_items

Explore the Whole 2021 Budget

Lets see if we can explore the whole budget interactively.

To do that we will build a hierarchical sunburst visualization.

First lets get the data we need.

treemap_columns <-
  c(
    "type_budget",
    "name_dod_budget_group",
    "name_agency_cgac",
    "name_account_omb_clean",
    "name_budget_parent",
    "name_budget_activity",
    "name_program_element",
    "slug_organization_account"
  )

tbl_treemap <- df_budget %>%
  count(
    !!!syms(treemap_columns),
    wt = amount_item,
    name = "amount",
    sort = T
  ) %>%
  filter(amount > 0)

Now we can build the interactive sunburst chart.

tm <-
  tbl_treemap %>%
  treemap(
    index = treemap_columns,
    vSize = "amount",
    draw = F,
    type = "index"
  )

tm_nest <- d3_nest(tm$tm[, c(treemap_columns,
                             "vSize",
                             "color")],
                   value_cols = c("vSize", "color"))

sun <- sund2b(
  tm_nest,
  colors = htmlwidgets::JS(# yes this is a little different, so please pay attention
    #  "function(d) {return d.color}" will not work
    "function(name, d){return d.color || '#ccc';}"),
  valueField = "vSize",
  elementId = "my-sunburst"
)

Lets See how it looks:

sun

Pivot Table

Finally let me show you how we can port this data into an interactive pivottable!

rpiv <- rpivotTable::rpivotTable(
  df_budget,
  rows = "name_agency_cgac",
  vals = "amount_item",
  aggregatorName = "Sum",
  width = "100%",
  height = "100%"
)

This wont render on markdown

rpiv

Questions and Follow Up

Looking at Transition for Anduril

This code block show cases how one can determine possible transitions using Anduril as an example

tbl_anduril <-
  fpds_atom(
    vendor_name = "ANDURIL",
    parse_contracts = T,
    snake_names = T
  )

tbl_anduril <- tbl_anduril %>%
  filter(id_duns != 103307059)

tbl_phase_2_date <-
  tbl_anduril %>%
  filter(!is.na(code_research)) %>%
  filter(code_research == "SR2") %>%
  group_by(code_research) %>%
  filter(date_obligation == min(date_obligation)) %>%
  select(
    id_duns,
    id_contract_phase_2 = id_contract_analysis,
    date_phase_2 = date_obligation,
    ageny_phase_2 = name_agency_cgac_award,
    office_award_phase_2 = name_office_award,
    office_fundinging_phase_2 = name_office_funding,
  ) %>%
  ungroup()

tbl_anduril <-
  tbl_anduril %>%
  left_join(tbl_phase_2_date, by = "id_duns")


tbl_anduril_post_phase_2 <-
  tbl_anduril %>%
  mutate(id_contract_analysis = id_contract_analysis %>% substr(1, 13)) %>%
  filter(id_contract_analysis != "FA865020C9300") %>%
  group_by(
    id_duns,
    name_vendor,
    id_contract_analysis,
    type_action,
    date_phase_2,
    type_research,
    type_inherently_government_function
  ) %>%
  summarise(
    date_award = min(date_obligation, na.rm = T),
    contract_actions = n(),
    department_award =  name_department_award[which.max(amount_obligation)],
    agency_award =  name_agency_award[which.max(amount_obligation)],
    office_award = name_office_award[which.max(amount_obligation)],
    amount_obligated = sum(amount_obligation, na.rm = T),
    .groups = "drop"
  ) %>%
  mutate(is_after_phase_2 = date_award > date_phase_2) %>%
  filter(is_after_phase_2) %>%
  ungroup() %>%
  select(-date_phase_2) %>%
  arrange(date_award) %>%
  mutate(contract_number_post_phase_2 = 1:n())

Here are the results

tbl_anduril_post_phase_2 %>%
  munge_data() %>%
  gt::gt()
id_duns name_vendor id_contract_analysis type_action type_research type_inherently_government_function date_award contract_actions department_award agency_award office_award amount_obligated is_after_phase_2 contract_number_post_phase_2
80788349 ANDURIL INDUSTRIES INC FA872620C0005 DEFINITIVE CONTRACT SMALL BUSINESS INNOVATION RESEARCH PROGRAM PHASE III ACTION OTHER FUNCTIONS 2019-11-22 5 DEPARTMENT OF DEFENSE DEPT OF THE AIR FORCE AFLCMC HNK C3IN $11,915,000 TRUE 1
80788349 ANDURIL INDUSTRIES INC FA864920P0246 PURCHASE ORDER SMALL BUSINESS INNOVATION RESEARCH PROGRAM PHASE I ACTION OTHER FUNCTIONS 2020-01-29 1 DEPARTMENT OF DEFENSE DEPT OF THE AIR FORCE USAF SBIR STTR CONTRACTING $50,000 TRUE 2
80788349 ANDURIL INDUSTRIES INC 70B02C20D0000 IDC SMALL BUSINESS INNOVATION RESEARCH PROGRAM PHASE III ACTION OTHER FUNCTIONS 2020-07-02 1 DEPARTMENT OF HOMELAND SECURITY US CUSTOMS AND BORDER PROTECTION AIR AND MARINE CTR DIV $0 TRUE 3
80788349 ANDURIL INDUSTRIES INC 70B02C20F0000 DELIVERY ORDER SMALL BUSINESS INNOVATION RESEARCH PROGRAM PHASE III ACTION OTHER FUNCTIONS 2020-07-02 2 DEPARTMENT OF HOMELAND SECURITY US CUSTOMS AND BORDER PROTECTION AIR AND MARINE CTR DIV $60,731,513 TRUE 4
Use or disclosure of data contained on this page is subject to the legend on the first page of the volume.
PW Communications, Inc.
Proprietary Information